PR middle-end/78468
[official-gcc.git] / gcc / config / sparc / sparc.c
blob890bde9fc0add5a04b9d4e9b5ac6c35a7c6ad392
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
60 #include "tree-vector-builder.h"
62 /* This file should be included last. */
63 #include "target-def.h"
65 /* Processor costs */
67 struct processor_costs {
68 /* Integer load */
69 const int int_load;
71 /* Integer signed load */
72 const int int_sload;
74 /* Integer zeroed load */
75 const int int_zload;
77 /* Float load */
78 const int float_load;
80 /* fmov, fneg, fabs */
81 const int float_move;
83 /* fadd, fsub */
84 const int float_plusminus;
86 /* fcmp */
87 const int float_cmp;
89 /* fmov, fmovr */
90 const int float_cmove;
92 /* fmul */
93 const int float_mul;
95 /* fdivs */
96 const int float_div_sf;
98 /* fdivd */
99 const int float_div_df;
101 /* fsqrts */
102 const int float_sqrt_sf;
104 /* fsqrtd */
105 const int float_sqrt_df;
107 /* umul/smul */
108 const int int_mul;
110 /* mulX */
111 const int int_mulX;
113 /* integer multiply cost for each bit set past the most
114 significant 3, so the formula for multiply cost becomes:
116 if (rs1 < 0)
117 highest_bit = highest_clear_bit(rs1);
118 else
119 highest_bit = highest_set_bit(rs1);
120 if (highest_bit < 3)
121 highest_bit = 3;
122 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
124 A value of zero indicates that the multiply costs is fixed,
125 and not variable. */
126 const int int_mul_bit_factor;
128 /* udiv/sdiv */
129 const int int_div;
131 /* divX */
132 const int int_divX;
134 /* movcc, movr */
135 const int int_cmove;
137 /* penalty for shifts, due to scheduling rules etc. */
138 const int shift_penalty;
141 static const
142 struct processor_costs cypress_costs = {
143 COSTS_N_INSNS (2), /* int load */
144 COSTS_N_INSNS (2), /* int signed load */
145 COSTS_N_INSNS (2), /* int zeroed load */
146 COSTS_N_INSNS (2), /* float load */
147 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
148 COSTS_N_INSNS (5), /* fadd, fsub */
149 COSTS_N_INSNS (1), /* fcmp */
150 COSTS_N_INSNS (1), /* fmov, fmovr */
151 COSTS_N_INSNS (7), /* fmul */
152 COSTS_N_INSNS (37), /* fdivs */
153 COSTS_N_INSNS (37), /* fdivd */
154 COSTS_N_INSNS (63), /* fsqrts */
155 COSTS_N_INSNS (63), /* fsqrtd */
156 COSTS_N_INSNS (1), /* imul */
157 COSTS_N_INSNS (1), /* imulX */
158 0, /* imul bit factor */
159 COSTS_N_INSNS (1), /* idiv */
160 COSTS_N_INSNS (1), /* idivX */
161 COSTS_N_INSNS (1), /* movcc/movr */
162 0, /* shift penalty */
165 static const
166 struct processor_costs supersparc_costs = {
167 COSTS_N_INSNS (1), /* int load */
168 COSTS_N_INSNS (1), /* int signed load */
169 COSTS_N_INSNS (1), /* int zeroed load */
170 COSTS_N_INSNS (0), /* float load */
171 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
172 COSTS_N_INSNS (3), /* fadd, fsub */
173 COSTS_N_INSNS (3), /* fcmp */
174 COSTS_N_INSNS (1), /* fmov, fmovr */
175 COSTS_N_INSNS (3), /* fmul */
176 COSTS_N_INSNS (6), /* fdivs */
177 COSTS_N_INSNS (9), /* fdivd */
178 COSTS_N_INSNS (12), /* fsqrts */
179 COSTS_N_INSNS (12), /* fsqrtd */
180 COSTS_N_INSNS (4), /* imul */
181 COSTS_N_INSNS (4), /* imulX */
182 0, /* imul bit factor */
183 COSTS_N_INSNS (4), /* idiv */
184 COSTS_N_INSNS (4), /* idivX */
185 COSTS_N_INSNS (1), /* movcc/movr */
186 1, /* shift penalty */
189 static const
190 struct processor_costs hypersparc_costs = {
191 COSTS_N_INSNS (1), /* int load */
192 COSTS_N_INSNS (1), /* int signed load */
193 COSTS_N_INSNS (1), /* int zeroed load */
194 COSTS_N_INSNS (1), /* float load */
195 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
196 COSTS_N_INSNS (1), /* fadd, fsub */
197 COSTS_N_INSNS (1), /* fcmp */
198 COSTS_N_INSNS (1), /* fmov, fmovr */
199 COSTS_N_INSNS (1), /* fmul */
200 COSTS_N_INSNS (8), /* fdivs */
201 COSTS_N_INSNS (12), /* fdivd */
202 COSTS_N_INSNS (17), /* fsqrts */
203 COSTS_N_INSNS (17), /* fsqrtd */
204 COSTS_N_INSNS (17), /* imul */
205 COSTS_N_INSNS (17), /* imulX */
206 0, /* imul bit factor */
207 COSTS_N_INSNS (17), /* idiv */
208 COSTS_N_INSNS (17), /* idivX */
209 COSTS_N_INSNS (1), /* movcc/movr */
210 0, /* shift penalty */
213 static const
214 struct processor_costs leon_costs = {
215 COSTS_N_INSNS (1), /* int load */
216 COSTS_N_INSNS (1), /* int signed load */
217 COSTS_N_INSNS (1), /* int zeroed load */
218 COSTS_N_INSNS (1), /* float load */
219 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
220 COSTS_N_INSNS (1), /* fadd, fsub */
221 COSTS_N_INSNS (1), /* fcmp */
222 COSTS_N_INSNS (1), /* fmov, fmovr */
223 COSTS_N_INSNS (1), /* fmul */
224 COSTS_N_INSNS (15), /* fdivs */
225 COSTS_N_INSNS (15), /* fdivd */
226 COSTS_N_INSNS (23), /* fsqrts */
227 COSTS_N_INSNS (23), /* fsqrtd */
228 COSTS_N_INSNS (5), /* imul */
229 COSTS_N_INSNS (5), /* imulX */
230 0, /* imul bit factor */
231 COSTS_N_INSNS (5), /* idiv */
232 COSTS_N_INSNS (5), /* idivX */
233 COSTS_N_INSNS (1), /* movcc/movr */
234 0, /* shift penalty */
237 static const
238 struct processor_costs leon3_costs = {
239 COSTS_N_INSNS (1), /* int load */
240 COSTS_N_INSNS (1), /* int signed load */
241 COSTS_N_INSNS (1), /* int zeroed load */
242 COSTS_N_INSNS (1), /* float load */
243 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
244 COSTS_N_INSNS (1), /* fadd, fsub */
245 COSTS_N_INSNS (1), /* fcmp */
246 COSTS_N_INSNS (1), /* fmov, fmovr */
247 COSTS_N_INSNS (1), /* fmul */
248 COSTS_N_INSNS (14), /* fdivs */
249 COSTS_N_INSNS (15), /* fdivd */
250 COSTS_N_INSNS (22), /* fsqrts */
251 COSTS_N_INSNS (23), /* fsqrtd */
252 COSTS_N_INSNS (5), /* imul */
253 COSTS_N_INSNS (5), /* imulX */
254 0, /* imul bit factor */
255 COSTS_N_INSNS (35), /* idiv */
256 COSTS_N_INSNS (35), /* idivX */
257 COSTS_N_INSNS (1), /* movcc/movr */
258 0, /* shift penalty */
261 static const
262 struct processor_costs sparclet_costs = {
263 COSTS_N_INSNS (3), /* int load */
264 COSTS_N_INSNS (3), /* int signed load */
265 COSTS_N_INSNS (1), /* int zeroed load */
266 COSTS_N_INSNS (1), /* float load */
267 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
268 COSTS_N_INSNS (1), /* fadd, fsub */
269 COSTS_N_INSNS (1), /* fcmp */
270 COSTS_N_INSNS (1), /* fmov, fmovr */
271 COSTS_N_INSNS (1), /* fmul */
272 COSTS_N_INSNS (1), /* fdivs */
273 COSTS_N_INSNS (1), /* fdivd */
274 COSTS_N_INSNS (1), /* fsqrts */
275 COSTS_N_INSNS (1), /* fsqrtd */
276 COSTS_N_INSNS (5), /* imul */
277 COSTS_N_INSNS (5), /* imulX */
278 0, /* imul bit factor */
279 COSTS_N_INSNS (5), /* idiv */
280 COSTS_N_INSNS (5), /* idivX */
281 COSTS_N_INSNS (1), /* movcc/movr */
282 0, /* shift penalty */
285 static const
286 struct processor_costs ultrasparc_costs = {
287 COSTS_N_INSNS (2), /* int load */
288 COSTS_N_INSNS (3), /* int signed load */
289 COSTS_N_INSNS (2), /* int zeroed load */
290 COSTS_N_INSNS (2), /* float load */
291 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
292 COSTS_N_INSNS (4), /* fadd, fsub */
293 COSTS_N_INSNS (1), /* fcmp */
294 COSTS_N_INSNS (2), /* fmov, fmovr */
295 COSTS_N_INSNS (4), /* fmul */
296 COSTS_N_INSNS (13), /* fdivs */
297 COSTS_N_INSNS (23), /* fdivd */
298 COSTS_N_INSNS (13), /* fsqrts */
299 COSTS_N_INSNS (23), /* fsqrtd */
300 COSTS_N_INSNS (4), /* imul */
301 COSTS_N_INSNS (4), /* imulX */
302 2, /* imul bit factor */
303 COSTS_N_INSNS (37), /* idiv */
304 COSTS_N_INSNS (68), /* idivX */
305 COSTS_N_INSNS (2), /* movcc/movr */
306 2, /* shift penalty */
309 static const
310 struct processor_costs ultrasparc3_costs = {
311 COSTS_N_INSNS (2), /* int load */
312 COSTS_N_INSNS (3), /* int signed load */
313 COSTS_N_INSNS (3), /* int zeroed load */
314 COSTS_N_INSNS (2), /* float load */
315 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
316 COSTS_N_INSNS (4), /* fadd, fsub */
317 COSTS_N_INSNS (5), /* fcmp */
318 COSTS_N_INSNS (3), /* fmov, fmovr */
319 COSTS_N_INSNS (4), /* fmul */
320 COSTS_N_INSNS (17), /* fdivs */
321 COSTS_N_INSNS (20), /* fdivd */
322 COSTS_N_INSNS (20), /* fsqrts */
323 COSTS_N_INSNS (29), /* fsqrtd */
324 COSTS_N_INSNS (6), /* imul */
325 COSTS_N_INSNS (6), /* imulX */
326 0, /* imul bit factor */
327 COSTS_N_INSNS (40), /* idiv */
328 COSTS_N_INSNS (71), /* idivX */
329 COSTS_N_INSNS (2), /* movcc/movr */
330 0, /* shift penalty */
333 static const
334 struct processor_costs niagara_costs = {
335 COSTS_N_INSNS (3), /* int load */
336 COSTS_N_INSNS (3), /* int signed load */
337 COSTS_N_INSNS (3), /* int zeroed load */
338 COSTS_N_INSNS (9), /* float load */
339 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
340 COSTS_N_INSNS (8), /* fadd, fsub */
341 COSTS_N_INSNS (26), /* fcmp */
342 COSTS_N_INSNS (8), /* fmov, fmovr */
343 COSTS_N_INSNS (29), /* fmul */
344 COSTS_N_INSNS (54), /* fdivs */
345 COSTS_N_INSNS (83), /* fdivd */
346 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
347 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
348 COSTS_N_INSNS (11), /* imul */
349 COSTS_N_INSNS (11), /* imulX */
350 0, /* imul bit factor */
351 COSTS_N_INSNS (72), /* idiv */
352 COSTS_N_INSNS (72), /* idivX */
353 COSTS_N_INSNS (1), /* movcc/movr */
354 0, /* shift penalty */
357 static const
358 struct processor_costs niagara2_costs = {
359 COSTS_N_INSNS (3), /* int load */
360 COSTS_N_INSNS (3), /* int signed load */
361 COSTS_N_INSNS (3), /* int zeroed load */
362 COSTS_N_INSNS (3), /* float load */
363 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
364 COSTS_N_INSNS (6), /* fadd, fsub */
365 COSTS_N_INSNS (6), /* fcmp */
366 COSTS_N_INSNS (6), /* fmov, fmovr */
367 COSTS_N_INSNS (6), /* fmul */
368 COSTS_N_INSNS (19), /* fdivs */
369 COSTS_N_INSNS (33), /* fdivd */
370 COSTS_N_INSNS (19), /* fsqrts */
371 COSTS_N_INSNS (33), /* fsqrtd */
372 COSTS_N_INSNS (5), /* imul */
373 COSTS_N_INSNS (5), /* imulX */
374 0, /* imul bit factor */
375 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
377 COSTS_N_INSNS (1), /* movcc/movr */
378 0, /* shift penalty */
381 static const
382 struct processor_costs niagara3_costs = {
383 COSTS_N_INSNS (3), /* int load */
384 COSTS_N_INSNS (3), /* int signed load */
385 COSTS_N_INSNS (3), /* int zeroed load */
386 COSTS_N_INSNS (3), /* float load */
387 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
388 COSTS_N_INSNS (9), /* fadd, fsub */
389 COSTS_N_INSNS (9), /* fcmp */
390 COSTS_N_INSNS (9), /* fmov, fmovr */
391 COSTS_N_INSNS (9), /* fmul */
392 COSTS_N_INSNS (23), /* fdivs */
393 COSTS_N_INSNS (37), /* fdivd */
394 COSTS_N_INSNS (23), /* fsqrts */
395 COSTS_N_INSNS (37), /* fsqrtd */
396 COSTS_N_INSNS (9), /* imul */
397 COSTS_N_INSNS (9), /* imulX */
398 0, /* imul bit factor */
399 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
400 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
401 COSTS_N_INSNS (1), /* movcc/movr */
402 0, /* shift penalty */
405 static const
406 struct processor_costs niagara4_costs = {
407 COSTS_N_INSNS (5), /* int load */
408 COSTS_N_INSNS (5), /* int signed load */
409 COSTS_N_INSNS (5), /* int zeroed load */
410 COSTS_N_INSNS (5), /* float load */
411 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
412 COSTS_N_INSNS (11), /* fadd, fsub */
413 COSTS_N_INSNS (11), /* fcmp */
414 COSTS_N_INSNS (11), /* fmov, fmovr */
415 COSTS_N_INSNS (11), /* fmul */
416 COSTS_N_INSNS (24), /* fdivs */
417 COSTS_N_INSNS (37), /* fdivd */
418 COSTS_N_INSNS (24), /* fsqrts */
419 COSTS_N_INSNS (37), /* fsqrtd */
420 COSTS_N_INSNS (12), /* imul */
421 COSTS_N_INSNS (12), /* imulX */
422 0, /* imul bit factor */
423 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
424 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
425 COSTS_N_INSNS (1), /* movcc/movr */
426 0, /* shift penalty */
429 static const
430 struct processor_costs niagara7_costs = {
431 COSTS_N_INSNS (5), /* int load */
432 COSTS_N_INSNS (5), /* int signed load */
433 COSTS_N_INSNS (5), /* int zeroed load */
434 COSTS_N_INSNS (5), /* float load */
435 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
436 COSTS_N_INSNS (11), /* fadd, fsub */
437 COSTS_N_INSNS (11), /* fcmp */
438 COSTS_N_INSNS (11), /* fmov, fmovr */
439 COSTS_N_INSNS (11), /* fmul */
440 COSTS_N_INSNS (24), /* fdivs */
441 COSTS_N_INSNS (37), /* fdivd */
442 COSTS_N_INSNS (24), /* fsqrts */
443 COSTS_N_INSNS (37), /* fsqrtd */
444 COSTS_N_INSNS (12), /* imul */
445 COSTS_N_INSNS (12), /* imulX */
446 0, /* imul bit factor */
447 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
448 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
449 COSTS_N_INSNS (1), /* movcc/movr */
450 0, /* shift penalty */
453 static const
454 struct processor_costs m8_costs = {
455 COSTS_N_INSNS (3), /* int load */
456 COSTS_N_INSNS (3), /* int signed load */
457 COSTS_N_INSNS (3), /* int zeroed load */
458 COSTS_N_INSNS (3), /* float load */
459 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
460 COSTS_N_INSNS (9), /* fadd, fsub */
461 COSTS_N_INSNS (9), /* fcmp */
462 COSTS_N_INSNS (9), /* fmov, fmovr */
463 COSTS_N_INSNS (9), /* fmul */
464 COSTS_N_INSNS (26), /* fdivs */
465 COSTS_N_INSNS (30), /* fdivd */
466 COSTS_N_INSNS (33), /* fsqrts */
467 COSTS_N_INSNS (41), /* fsqrtd */
468 COSTS_N_INSNS (12), /* imul */
469 COSTS_N_INSNS (10), /* imulX */
470 0, /* imul bit factor */
471 COSTS_N_INSNS (57), /* udiv/sdiv */
472 COSTS_N_INSNS (30), /* udivx/sdivx */
473 COSTS_N_INSNS (1), /* movcc/movr */
474 0, /* shift penalty */
477 static const struct processor_costs *sparc_costs = &cypress_costs;
479 #ifdef HAVE_AS_RELAX_OPTION
480 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
481 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
482 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
483 somebody does not branch between the sethi and jmp. */
484 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
485 #else
486 #define LEAF_SIBCALL_SLOT_RESERVED_P \
487 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
488 #endif
490 /* Vector to say how input registers are mapped to output registers.
491 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
492 eliminate it. You must use -fomit-frame-pointer to get that. */
493 char leaf_reg_remap[] =
494 { 0, 1, 2, 3, 4, 5, 6, 7,
495 -1, -1, -1, -1, -1, -1, 14, -1,
496 -1, -1, -1, -1, -1, -1, -1, -1,
497 8, 9, 10, 11, 12, 13, -1, 15,
499 32, 33, 34, 35, 36, 37, 38, 39,
500 40, 41, 42, 43, 44, 45, 46, 47,
501 48, 49, 50, 51, 52, 53, 54, 55,
502 56, 57, 58, 59, 60, 61, 62, 63,
503 64, 65, 66, 67, 68, 69, 70, 71,
504 72, 73, 74, 75, 76, 77, 78, 79,
505 80, 81, 82, 83, 84, 85, 86, 87,
506 88, 89, 90, 91, 92, 93, 94, 95,
507 96, 97, 98, 99, 100, 101, 102};
509 /* Vector, indexed by hard register number, which contains 1
510 for a register that is allowable in a candidate for leaf
511 function treatment. */
512 char sparc_leaf_regs[] =
513 { 1, 1, 1, 1, 1, 1, 1, 1,
514 0, 0, 0, 0, 0, 0, 1, 0,
515 0, 0, 0, 0, 0, 0, 0, 0,
516 1, 1, 1, 1, 1, 1, 0, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1, 1,
525 1, 1, 1, 1, 1, 1, 1};
527 struct GTY(()) machine_function
529 /* Size of the frame of the function. */
530 HOST_WIDE_INT frame_size;
532 /* Size of the frame of the function minus the register window save area
533 and the outgoing argument area. */
534 HOST_WIDE_INT apparent_frame_size;
536 /* Register we pretend the frame pointer is allocated to. Normally, this
537 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
538 record "offset" separately as it may be too big for (reg + disp). */
539 rtx frame_base_reg;
540 HOST_WIDE_INT frame_base_offset;
542 /* Number of global or FP registers to be saved (as 4-byte quantities). */
543 int n_global_fp_regs;
545 /* True if the current function is leaf and uses only leaf regs,
546 so that the SPARC leaf function optimization can be applied.
547 Private version of crtl->uses_only_leaf_regs, see
548 sparc_expand_prologue for the rationale. */
549 int leaf_function_p;
551 /* True if the prologue saves local or in registers. */
552 bool save_local_in_regs_p;
554 /* True if the data calculated by sparc_expand_prologue are valid. */
555 bool prologue_data_valid_p;
558 #define sparc_frame_size cfun->machine->frame_size
559 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
560 #define sparc_frame_base_reg cfun->machine->frame_base_reg
561 #define sparc_frame_base_offset cfun->machine->frame_base_offset
562 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
563 #define sparc_leaf_function_p cfun->machine->leaf_function_p
564 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
565 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
567 /* 1 if the next opcode is to be specially indented. */
568 int sparc_indent_opcode = 0;
570 static void sparc_option_override (void);
571 static void sparc_init_modes (void);
572 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
573 const_tree, bool, bool, int *, int *);
575 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
576 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
578 static void sparc_emit_set_const32 (rtx, rtx);
579 static void sparc_emit_set_const64 (rtx, rtx);
580 static void sparc_output_addr_vec (rtx);
581 static void sparc_output_addr_diff_vec (rtx);
582 static void sparc_output_deferred_case_vectors (void);
583 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
584 static bool sparc_legitimate_constant_p (machine_mode, rtx);
585 static rtx sparc_builtin_saveregs (void);
586 static int epilogue_renumber (rtx *, int);
587 static bool sparc_assemble_integer (rtx, unsigned int, int);
588 static int set_extends (rtx_insn *);
589 static void sparc_asm_function_prologue (FILE *);
590 static void sparc_asm_function_epilogue (FILE *);
591 #ifdef TARGET_SOLARIS
592 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
593 tree) ATTRIBUTE_UNUSED;
594 #endif
595 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
596 static int sparc_issue_rate (void);
597 static void sparc_sched_init (FILE *, int, int);
598 static int sparc_use_sched_lookahead (void);
600 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
601 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
603 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
604 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
606 static bool sparc_function_ok_for_sibcall (tree, tree);
607 static void sparc_init_libfuncs (void);
608 static void sparc_init_builtins (void);
609 static void sparc_fpu_init_builtins (void);
610 static void sparc_vis_init_builtins (void);
611 static tree sparc_builtin_decl (unsigned, bool);
612 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
613 static tree sparc_fold_builtin (tree, int, tree *, bool);
614 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
615 HOST_WIDE_INT, tree);
616 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
617 HOST_WIDE_INT, const_tree);
618 static struct machine_function * sparc_init_machine_status (void);
619 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
620 static rtx sparc_tls_get_addr (void);
621 static rtx sparc_tls_got (void);
622 static int sparc_register_move_cost (machine_mode,
623 reg_class_t, reg_class_t);
624 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
625 static rtx sparc_function_value (const_tree, const_tree, bool);
626 static rtx sparc_libcall_value (machine_mode, const_rtx);
627 static bool sparc_function_value_regno_p (const unsigned int);
628 static rtx sparc_struct_value_rtx (tree, int);
629 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
630 int *, const_tree, int);
631 static bool sparc_return_in_memory (const_tree, const_tree);
632 static bool sparc_strict_argument_naming (cumulative_args_t);
633 static void sparc_va_start (tree, rtx);
634 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
635 static bool sparc_vector_mode_supported_p (machine_mode);
636 static bool sparc_tls_referenced_p (rtx);
637 static rtx sparc_legitimize_tls_address (rtx);
638 static rtx sparc_legitimize_pic_address (rtx, rtx);
639 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
640 static rtx sparc_delegitimize_address (rtx);
641 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
642 static bool sparc_pass_by_reference (cumulative_args_t,
643 machine_mode, const_tree, bool);
644 static void sparc_function_arg_advance (cumulative_args_t,
645 machine_mode, const_tree, bool);
646 static rtx sparc_function_arg_1 (cumulative_args_t,
647 machine_mode, const_tree, bool, bool);
648 static rtx sparc_function_arg (cumulative_args_t,
649 machine_mode, const_tree, bool);
650 static rtx sparc_function_incoming_arg (cumulative_args_t,
651 machine_mode, const_tree, bool);
652 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
653 static unsigned int sparc_function_arg_boundary (machine_mode,
654 const_tree);
655 static int sparc_arg_partial_bytes (cumulative_args_t,
656 machine_mode, tree, bool);
657 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
658 static void sparc_file_end (void);
659 static bool sparc_frame_pointer_required (void);
660 static bool sparc_can_eliminate (const int, const int);
661 static rtx sparc_builtin_setjmp_frame_value (void);
662 static void sparc_conditional_register_usage (void);
663 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
664 static const char *sparc_mangle_type (const_tree);
665 #endif
666 static void sparc_trampoline_init (rtx, tree, rtx);
667 static machine_mode sparc_preferred_simd_mode (scalar_mode);
668 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
669 static bool sparc_lra_p (void);
670 static bool sparc_print_operand_punct_valid_p (unsigned char);
671 static void sparc_print_operand (FILE *, rtx, int);
672 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
673 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
674 machine_mode,
675 secondary_reload_info *);
676 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
677 reg_class_t);
678 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
679 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
680 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
681 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
682 static unsigned int sparc_min_arithmetic_precision (void);
683 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
684 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
685 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
686 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
687 reg_class_t);
688 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
690 #ifdef SUBTARGET_ATTRIBUTE_TABLE
691 /* Table of valid machine attributes. */
692 static const struct attribute_spec sparc_attribute_table[] =
694 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
695 do_diagnostic, exclusions } */
696 SUBTARGET_ATTRIBUTE_TABLE,
697 { NULL, 0, 0, false, false, false, NULL, false, NULL }
699 #endif
701 /* Option handling. */
703 /* Parsed value. */
704 enum cmodel sparc_cmodel;
706 char sparc_hard_reg_printed[8];
708 /* Initialize the GCC target structure. */
710 /* The default is to use .half rather than .short for aligned HI objects. */
711 #undef TARGET_ASM_ALIGNED_HI_OP
712 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
714 #undef TARGET_ASM_UNALIGNED_HI_OP
715 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
716 #undef TARGET_ASM_UNALIGNED_SI_OP
717 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
718 #undef TARGET_ASM_UNALIGNED_DI_OP
719 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
721 /* The target hook has to handle DI-mode values. */
722 #undef TARGET_ASM_INTEGER
723 #define TARGET_ASM_INTEGER sparc_assemble_integer
725 #undef TARGET_ASM_FUNCTION_PROLOGUE
726 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
727 #undef TARGET_ASM_FUNCTION_EPILOGUE
728 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
730 #undef TARGET_SCHED_ADJUST_COST
731 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
732 #undef TARGET_SCHED_ISSUE_RATE
733 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
734 #undef TARGET_SCHED_INIT
735 #define TARGET_SCHED_INIT sparc_sched_init
736 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
737 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
739 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
740 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
742 #undef TARGET_INIT_LIBFUNCS
743 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
745 #undef TARGET_LEGITIMIZE_ADDRESS
746 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
747 #undef TARGET_DELEGITIMIZE_ADDRESS
748 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
749 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
750 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
752 #undef TARGET_INIT_BUILTINS
753 #define TARGET_INIT_BUILTINS sparc_init_builtins
754 #undef TARGET_BUILTIN_DECL
755 #define TARGET_BUILTIN_DECL sparc_builtin_decl
756 #undef TARGET_EXPAND_BUILTIN
757 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
758 #undef TARGET_FOLD_BUILTIN
759 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
761 #if TARGET_TLS
762 #undef TARGET_HAVE_TLS
763 #define TARGET_HAVE_TLS true
764 #endif
766 #undef TARGET_CANNOT_FORCE_CONST_MEM
767 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
769 #undef TARGET_ASM_OUTPUT_MI_THUNK
770 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
771 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
772 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
774 #undef TARGET_RTX_COSTS
775 #define TARGET_RTX_COSTS sparc_rtx_costs
776 #undef TARGET_ADDRESS_COST
777 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
778 #undef TARGET_REGISTER_MOVE_COST
779 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
781 #undef TARGET_PROMOTE_FUNCTION_MODE
782 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
784 #undef TARGET_FUNCTION_VALUE
785 #define TARGET_FUNCTION_VALUE sparc_function_value
786 #undef TARGET_LIBCALL_VALUE
787 #define TARGET_LIBCALL_VALUE sparc_libcall_value
788 #undef TARGET_FUNCTION_VALUE_REGNO_P
789 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
791 #undef TARGET_STRUCT_VALUE_RTX
792 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
793 #undef TARGET_RETURN_IN_MEMORY
794 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
795 #undef TARGET_MUST_PASS_IN_STACK
796 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
797 #undef TARGET_PASS_BY_REFERENCE
798 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
799 #undef TARGET_ARG_PARTIAL_BYTES
800 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
801 #undef TARGET_FUNCTION_ARG_ADVANCE
802 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
803 #undef TARGET_FUNCTION_ARG
804 #define TARGET_FUNCTION_ARG sparc_function_arg
805 #undef TARGET_FUNCTION_INCOMING_ARG
806 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
807 #undef TARGET_FUNCTION_ARG_PADDING
808 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
809 #undef TARGET_FUNCTION_ARG_BOUNDARY
810 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
812 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
813 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
814 #undef TARGET_STRICT_ARGUMENT_NAMING
815 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
817 #undef TARGET_EXPAND_BUILTIN_VA_START
818 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
819 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
820 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
822 #undef TARGET_VECTOR_MODE_SUPPORTED_P
823 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
825 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
826 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
828 #ifdef SUBTARGET_INSERT_ATTRIBUTES
829 #undef TARGET_INSERT_ATTRIBUTES
830 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
831 #endif
833 #ifdef SUBTARGET_ATTRIBUTE_TABLE
834 #undef TARGET_ATTRIBUTE_TABLE
835 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
836 #endif
838 #undef TARGET_OPTION_OVERRIDE
839 #define TARGET_OPTION_OVERRIDE sparc_option_override
841 #ifdef TARGET_THREAD_SSP_OFFSET
842 #undef TARGET_STACK_PROTECT_GUARD
843 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
844 #endif
846 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
847 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
848 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
849 #endif
851 #undef TARGET_ASM_FILE_END
852 #define TARGET_ASM_FILE_END sparc_file_end
854 #undef TARGET_FRAME_POINTER_REQUIRED
855 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
857 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
858 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
860 #undef TARGET_CAN_ELIMINATE
861 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
863 #undef TARGET_PREFERRED_RELOAD_CLASS
864 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
866 #undef TARGET_SECONDARY_RELOAD
867 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
868 #undef TARGET_SECONDARY_MEMORY_NEEDED
869 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
870 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
871 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
873 #undef TARGET_CONDITIONAL_REGISTER_USAGE
874 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
876 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
877 #undef TARGET_MANGLE_TYPE
878 #define TARGET_MANGLE_TYPE sparc_mangle_type
879 #endif
881 #undef TARGET_LRA_P
882 #define TARGET_LRA_P sparc_lra_p
884 #undef TARGET_LEGITIMATE_ADDRESS_P
885 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
887 #undef TARGET_LEGITIMATE_CONSTANT_P
888 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
890 #undef TARGET_TRAMPOLINE_INIT
891 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
893 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
894 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
895 #undef TARGET_PRINT_OPERAND
896 #define TARGET_PRINT_OPERAND sparc_print_operand
897 #undef TARGET_PRINT_OPERAND_ADDRESS
898 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
900 /* The value stored by LDSTUB. */
901 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
902 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
904 #undef TARGET_CSTORE_MODE
905 #define TARGET_CSTORE_MODE sparc_cstore_mode
907 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
908 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
910 #undef TARGET_FIXED_CONDITION_CODE_REGS
911 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
913 #undef TARGET_MIN_ARITHMETIC_PRECISION
914 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
916 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
917 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
919 #undef TARGET_HARD_REGNO_NREGS
920 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
921 #undef TARGET_HARD_REGNO_MODE_OK
922 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
924 #undef TARGET_MODES_TIEABLE_P
925 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
927 #undef TARGET_CAN_CHANGE_MODE_CLASS
928 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
930 #undef TARGET_CONSTANT_ALIGNMENT
931 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
933 struct gcc_target targetm = TARGET_INITIALIZER;
935 /* Return the memory reference contained in X if any, zero otherwise. */
937 static rtx
938 mem_ref (rtx x)
940 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
941 x = XEXP (x, 0);
943 if (MEM_P (x))
944 return x;
946 return NULL_RTX;
949 /* True if any of INSN's source register(s) is REG. */
951 static bool
952 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
954 extract_insn (insn);
955 return ((REG_P (recog_data.operand[1])
956 && REGNO (recog_data.operand[1]) == reg)
957 || (recog_data.n_operands == 3
958 && REG_P (recog_data.operand[2])
959 && REGNO (recog_data.operand[2]) == reg));
962 /* True if INSN is a floating-point division or square-root. */
964 static bool
965 div_sqrt_insn_p (rtx_insn *insn)
967 if (GET_CODE (PATTERN (insn)) != SET)
968 return false;
970 switch (get_attr_type (insn))
972 case TYPE_FPDIVS:
973 case TYPE_FPSQRTS:
974 case TYPE_FPDIVD:
975 case TYPE_FPSQRTD:
976 return true;
977 default:
978 return false;
982 /* True if INSN is a floating-point instruction. */
984 static bool
985 fpop_insn_p (rtx_insn *insn)
987 if (GET_CODE (PATTERN (insn)) != SET)
988 return false;
990 switch (get_attr_type (insn))
992 case TYPE_FPMOVE:
993 case TYPE_FPCMOVE:
994 case TYPE_FP:
995 case TYPE_FPCMP:
996 case TYPE_FPMUL:
997 case TYPE_FPDIVS:
998 case TYPE_FPSQRTS:
999 case TYPE_FPDIVD:
1000 case TYPE_FPSQRTD:
1001 return true;
1002 default:
1003 return false;
1007 /* True if INSN is an atomic instruction. */
1009 static bool
1010 atomic_insn_for_leon3_p (rtx_insn *insn)
1012 switch (INSN_CODE (insn))
1014 case CODE_FOR_swapsi:
1015 case CODE_FOR_ldstub:
1016 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1017 return true;
1018 default:
1019 return false;
1023 /* We use a machine specific pass to enable workarounds for errata.
1025 We need to have the (essentially) final form of the insn stream in order
1026 to properly detect the various hazards. Therefore, this machine specific
1027 pass runs as late as possible. */
1029 /* True if INSN is a md pattern or asm statement. */
1030 #define USEFUL_INSN_P(INSN) \
1031 (NONDEBUG_INSN_P (INSN) \
1032 && GET_CODE (PATTERN (INSN)) != USE \
1033 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1035 static unsigned int
1036 sparc_do_work_around_errata (void)
1038 rtx_insn *insn, *next;
1040 /* Force all instructions to be split into their final form. */
1041 split_all_insns_noflow ();
1043 /* Now look for specific patterns in the insn stream. */
1044 for (insn = get_insns (); insn; insn = next)
1046 bool insert_nop = false;
1047 rtx set;
1048 rtx_insn *jump;
1049 rtx_sequence *seq;
1051 /* Look into the instruction in a delay slot. */
1052 if (NONJUMP_INSN_P (insn)
1053 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1055 jump = seq->insn (0);
1056 insn = seq->insn (1);
1058 else if (JUMP_P (insn))
1059 jump = insn;
1060 else
1061 jump = NULL;
1063 /* Place a NOP at the branch target of an integer branch if it is a
1064 floating-point operation or a floating-point branch. */
1065 if (sparc_fix_gr712rc
1066 && jump
1067 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1069 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1070 if (target
1071 && (fpop_insn_p (target)
1072 || (JUMP_P (target)
1073 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1074 emit_insn_before (gen_nop (), target);
1077 /* Insert a NOP between load instruction and atomic instruction. Insert
1078 a NOP at branch target if there is a load in delay slot and an atomic
1079 instruction at branch target. */
1080 if (sparc_fix_ut700
1081 && NONJUMP_INSN_P (insn)
1082 && (set = single_set (insn)) != NULL_RTX
1083 && mem_ref (SET_SRC (set))
1084 && REG_P (SET_DEST (set)))
1086 if (jump)
1088 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1089 if (target && atomic_insn_for_leon3_p (target))
1090 emit_insn_before (gen_nop (), target);
1093 next = next_active_insn (insn);
1094 if (!next)
1095 break;
1097 if (atomic_insn_for_leon3_p (next))
1098 insert_nop = true;
1101 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1102 ends with another fdiv or fsqrt instruction with no dependencies on
1103 the former, along with an appropriate pattern in between. */
1104 if (sparc_fix_lost_divsqrt
1105 && NONJUMP_INSN_P (insn)
1106 && div_sqrt_insn_p (insn))
1108 int i;
1109 int fp_found = 0;
1110 rtx_insn *after;
1112 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1114 next = next_active_insn (insn);
1115 if (!next)
1116 break;
1118 for (after = next, i = 0; i < 4; i++)
1120 /* Count floating-point operations. */
1121 if (i != 3 && fpop_insn_p (after))
1123 /* If the insn uses the destination register of
1124 the div/sqrt, then it cannot be problematic. */
1125 if (insn_uses_reg_p (after, dest_reg))
1126 break;
1127 fp_found++;
1130 /* Count floating-point loads. */
1131 if (i != 3
1132 && (set = single_set (after)) != NULL_RTX
1133 && REG_P (SET_DEST (set))
1134 && REGNO (SET_DEST (set)) > 31)
1136 /* If the insn uses the destination register of
1137 the div/sqrt, then it cannot be problematic. */
1138 if (REGNO (SET_DEST (set)) == dest_reg)
1139 break;
1140 fp_found++;
1143 /* Check if this is a problematic sequence. */
1144 if (i > 1
1145 && fp_found >= 2
1146 && div_sqrt_insn_p (after))
1148 /* If this is the short version of the problematic
1149 sequence we add two NOPs in a row to also prevent
1150 the long version. */
1151 if (i == 2)
1152 emit_insn_before (gen_nop (), next);
1153 insert_nop = true;
1154 break;
1157 /* No need to scan past a second div/sqrt. */
1158 if (div_sqrt_insn_p (after))
1159 break;
1161 /* Insert NOP before branch. */
1162 if (i < 3
1163 && (!NONJUMP_INSN_P (after)
1164 || GET_CODE (PATTERN (after)) == SEQUENCE))
1166 insert_nop = true;
1167 break;
1170 after = next_active_insn (after);
1171 if (!after)
1172 break;
1176 /* Look for either of these two sequences:
1178 Sequence A:
1179 1. store of word size or less (e.g. st / stb / sth / stf)
1180 2. any single instruction that is not a load or store
1181 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1183 Sequence B:
1184 1. store of double word size (e.g. std / stdf)
1185 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1186 if (sparc_fix_b2bst
1187 && NONJUMP_INSN_P (insn)
1188 && (set = single_set (insn)) != NULL_RTX
1189 && MEM_P (SET_DEST (set)))
1191 /* Sequence B begins with a double-word store. */
1192 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1193 rtx_insn *after;
1194 int i;
1196 next = next_active_insn (insn);
1197 if (!next)
1198 break;
1200 for (after = next, i = 0; i < 2; i++)
1202 /* Skip empty assembly statements. */
1203 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1204 || (USEFUL_INSN_P (after)
1205 && (asm_noperands (PATTERN (after))>=0)
1206 && !strcmp (decode_asm_operands (PATTERN (after),
1207 NULL, NULL, NULL,
1208 NULL, NULL), "")))
1209 after = next_active_insn (after);
1210 if (!after)
1211 break;
1213 /* If the insn is a branch, then it cannot be problematic. */
1214 if (!NONJUMP_INSN_P (after)
1215 || GET_CODE (PATTERN (after)) == SEQUENCE)
1216 break;
1218 /* Sequence B is only two instructions long. */
1219 if (seq_b)
1221 /* Add NOP if followed by a store. */
1222 if ((set = single_set (after)) != NULL_RTX
1223 && MEM_P (SET_DEST (set)))
1224 insert_nop = true;
1226 /* Otherwise it is ok. */
1227 break;
1230 /* If the second instruction is a load or a store,
1231 then the sequence cannot be problematic. */
1232 if (i == 0)
1234 if ((set = single_set (after)) != NULL_RTX
1235 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1236 break;
1238 after = next_active_insn (after);
1239 if (!after)
1240 break;
1243 /* Add NOP if third instruction is a store. */
1244 if (i == 1
1245 && (set = single_set (after)) != NULL_RTX
1246 && MEM_P (SET_DEST (set)))
1247 insert_nop = true;
1251 /* Look for a single-word load into an odd-numbered FP register. */
1252 else if (sparc_fix_at697f
1253 && NONJUMP_INSN_P (insn)
1254 && (set = single_set (insn)) != NULL_RTX
1255 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1256 && mem_ref (SET_SRC (set))
1257 && REG_P (SET_DEST (set))
1258 && REGNO (SET_DEST (set)) > 31
1259 && REGNO (SET_DEST (set)) % 2 != 0)
1261 /* The wrong dependency is on the enclosing double register. */
1262 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1263 unsigned int src1, src2, dest;
1264 int code;
1266 next = next_active_insn (insn);
1267 if (!next)
1268 break;
1269 /* If the insn is a branch, then it cannot be problematic. */
1270 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1271 continue;
1273 extract_insn (next);
1274 code = INSN_CODE (next);
1276 switch (code)
1278 case CODE_FOR_adddf3:
1279 case CODE_FOR_subdf3:
1280 case CODE_FOR_muldf3:
1281 case CODE_FOR_divdf3:
1282 dest = REGNO (recog_data.operand[0]);
1283 src1 = REGNO (recog_data.operand[1]);
1284 src2 = REGNO (recog_data.operand[2]);
1285 if (src1 != src2)
1287 /* Case [1-4]:
1288 ld [address], %fx+1
1289 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1290 if ((src1 == x || src2 == x)
1291 && (dest == src1 || dest == src2))
1292 insert_nop = true;
1294 else
1296 /* Case 5:
1297 ld [address], %fx+1
1298 FPOPd %fx, %fx, %fx */
1299 if (src1 == x
1300 && dest == src1
1301 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1302 insert_nop = true;
1304 break;
1306 case CODE_FOR_sqrtdf2:
1307 dest = REGNO (recog_data.operand[0]);
1308 src1 = REGNO (recog_data.operand[1]);
1309 /* Case 6:
1310 ld [address], %fx+1
1311 fsqrtd %fx, %fx */
1312 if (src1 == x && dest == src1)
1313 insert_nop = true;
1314 break;
1316 default:
1317 break;
1321 /* Look for a single-word load into an integer register. */
1322 else if (sparc_fix_ut699
1323 && NONJUMP_INSN_P (insn)
1324 && (set = single_set (insn)) != NULL_RTX
1325 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1326 && (mem_ref (SET_SRC (set)) != NULL_RTX
1327 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1328 && REG_P (SET_DEST (set))
1329 && REGNO (SET_DEST (set)) < 32)
1331 /* There is no problem if the second memory access has a data
1332 dependency on the first single-cycle load. */
1333 rtx x = SET_DEST (set);
1335 next = next_active_insn (insn);
1336 if (!next)
1337 break;
1338 /* If the insn is a branch, then it cannot be problematic. */
1339 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1340 continue;
1342 /* Look for a second memory access to/from an integer register. */
1343 if ((set = single_set (next)) != NULL_RTX)
1345 rtx src = SET_SRC (set);
1346 rtx dest = SET_DEST (set);
1347 rtx mem;
1349 /* LDD is affected. */
1350 if ((mem = mem_ref (src)) != NULL_RTX
1351 && REG_P (dest)
1352 && REGNO (dest) < 32
1353 && !reg_mentioned_p (x, XEXP (mem, 0)))
1354 insert_nop = true;
1356 /* STD is *not* affected. */
1357 else if (MEM_P (dest)
1358 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1359 && (src == CONST0_RTX (GET_MODE (dest))
1360 || (REG_P (src)
1361 && REGNO (src) < 32
1362 && REGNO (src) != REGNO (x)))
1363 && !reg_mentioned_p (x, XEXP (dest, 0)))
1364 insert_nop = true;
1366 /* GOT accesses uses LD. */
1367 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1368 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1369 insert_nop = true;
1373 /* Look for a single-word load/operation into an FP register. */
1374 else if (sparc_fix_ut699
1375 && NONJUMP_INSN_P (insn)
1376 && (set = single_set (insn)) != NULL_RTX
1377 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1378 && REG_P (SET_DEST (set))
1379 && REGNO (SET_DEST (set)) > 31)
1381 /* Number of instructions in the problematic window. */
1382 const int n_insns = 4;
1383 /* The problematic combination is with the sibling FP register. */
1384 const unsigned int x = REGNO (SET_DEST (set));
1385 const unsigned int y = x ^ 1;
1386 rtx_insn *after;
1387 int i;
1389 next = next_active_insn (insn);
1390 if (!next)
1391 break;
1392 /* If the insn is a branch, then it cannot be problematic. */
1393 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1394 continue;
1396 /* Look for a second load/operation into the sibling FP register. */
1397 if (!((set = single_set (next)) != NULL_RTX
1398 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1399 && REG_P (SET_DEST (set))
1400 && REGNO (SET_DEST (set)) == y))
1401 continue;
1403 /* Look for a (possible) store from the FP register in the next N
1404 instructions, but bail out if it is again modified or if there
1405 is a store from the sibling FP register before this store. */
1406 for (after = next, i = 0; i < n_insns; i++)
1408 bool branch_p;
1410 after = next_active_insn (after);
1411 if (!after)
1412 break;
1414 /* This is a branch with an empty delay slot. */
1415 if (!NONJUMP_INSN_P (after))
1417 if (++i == n_insns)
1418 break;
1419 branch_p = true;
1420 after = NULL;
1422 /* This is a branch with a filled delay slot. */
1423 else if (rtx_sequence *seq =
1424 dyn_cast <rtx_sequence *> (PATTERN (after)))
1426 if (++i == n_insns)
1427 break;
1428 branch_p = true;
1429 after = seq->insn (1);
1431 /* This is a regular instruction. */
1432 else
1433 branch_p = false;
1435 if (after && (set = single_set (after)) != NULL_RTX)
1437 const rtx src = SET_SRC (set);
1438 const rtx dest = SET_DEST (set);
1439 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1441 /* If the FP register is again modified before the store,
1442 then the store isn't affected. */
1443 if (REG_P (dest)
1444 && (REGNO (dest) == x
1445 || (REGNO (dest) == y && size == 8)))
1446 break;
1448 if (MEM_P (dest) && REG_P (src))
1450 /* If there is a store from the sibling FP register
1451 before the store, then the store is not affected. */
1452 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1453 break;
1455 /* Otherwise, the store is affected. */
1456 if (REGNO (src) == x && size == 4)
1458 insert_nop = true;
1459 break;
1464 /* If we have a branch in the first M instructions, then we
1465 cannot see the (M+2)th instruction so we play safe. */
1466 if (branch_p && i <= (n_insns - 2))
1468 insert_nop = true;
1469 break;
1474 else
1475 next = NEXT_INSN (insn);
1477 if (insert_nop)
1478 emit_insn_before (gen_nop (), next);
1481 return 0;
1484 namespace {
1486 const pass_data pass_data_work_around_errata =
1488 RTL_PASS, /* type */
1489 "errata", /* name */
1490 OPTGROUP_NONE, /* optinfo_flags */
1491 TV_MACH_DEP, /* tv_id */
1492 0, /* properties_required */
1493 0, /* properties_provided */
1494 0, /* properties_destroyed */
1495 0, /* todo_flags_start */
1496 0, /* todo_flags_finish */
1499 class pass_work_around_errata : public rtl_opt_pass
1501 public:
1502 pass_work_around_errata(gcc::context *ctxt)
1503 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1506 /* opt_pass methods: */
1507 virtual bool gate (function *)
1509 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst
1510 || sparc_fix_gr712rc || sparc_fix_ut700 || sparc_fix_lost_divsqrt;
1513 virtual unsigned int execute (function *)
1515 return sparc_do_work_around_errata ();
1518 }; // class pass_work_around_errata
1520 } // anon namespace
1522 rtl_opt_pass *
1523 make_pass_work_around_errata (gcc::context *ctxt)
1525 return new pass_work_around_errata (ctxt);
1528 /* Helpers for TARGET_DEBUG_OPTIONS. */
1529 static void
1530 dump_target_flag_bits (const int flags)
1532 if (flags & MASK_64BIT)
1533 fprintf (stderr, "64BIT ");
1534 if (flags & MASK_APP_REGS)
1535 fprintf (stderr, "APP_REGS ");
1536 if (flags & MASK_FASTER_STRUCTS)
1537 fprintf (stderr, "FASTER_STRUCTS ");
1538 if (flags & MASK_FLAT)
1539 fprintf (stderr, "FLAT ");
1540 if (flags & MASK_FMAF)
1541 fprintf (stderr, "FMAF ");
1542 if (flags & MASK_FSMULD)
1543 fprintf (stderr, "FSMULD ");
1544 if (flags & MASK_FPU)
1545 fprintf (stderr, "FPU ");
1546 if (flags & MASK_HARD_QUAD)
1547 fprintf (stderr, "HARD_QUAD ");
1548 if (flags & MASK_POPC)
1549 fprintf (stderr, "POPC ");
1550 if (flags & MASK_PTR64)
1551 fprintf (stderr, "PTR64 ");
1552 if (flags & MASK_STACK_BIAS)
1553 fprintf (stderr, "STACK_BIAS ");
1554 if (flags & MASK_UNALIGNED_DOUBLES)
1555 fprintf (stderr, "UNALIGNED_DOUBLES ");
1556 if (flags & MASK_V8PLUS)
1557 fprintf (stderr, "V8PLUS ");
1558 if (flags & MASK_VIS)
1559 fprintf (stderr, "VIS ");
1560 if (flags & MASK_VIS2)
1561 fprintf (stderr, "VIS2 ");
1562 if (flags & MASK_VIS3)
1563 fprintf (stderr, "VIS3 ");
1564 if (flags & MASK_VIS4)
1565 fprintf (stderr, "VIS4 ");
1566 if (flags & MASK_VIS4B)
1567 fprintf (stderr, "VIS4B ");
1568 if (flags & MASK_CBCOND)
1569 fprintf (stderr, "CBCOND ");
1570 if (flags & MASK_DEPRECATED_V8_INSNS)
1571 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1572 if (flags & MASK_SPARCLET)
1573 fprintf (stderr, "SPARCLET ");
1574 if (flags & MASK_SPARCLITE)
1575 fprintf (stderr, "SPARCLITE ");
1576 if (flags & MASK_V8)
1577 fprintf (stderr, "V8 ");
1578 if (flags & MASK_V9)
1579 fprintf (stderr, "V9 ");
1582 static void
1583 dump_target_flags (const char *prefix, const int flags)
1585 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1586 dump_target_flag_bits (flags);
1587 fprintf(stderr, "]\n");
1590 /* Validate and override various options, and do some machine dependent
1591 initialization. */
1593 static void
1594 sparc_option_override (void)
1596 static struct code_model {
1597 const char *const name;
1598 const enum cmodel value;
1599 } const cmodels[] = {
1600 { "32", CM_32 },
1601 { "medlow", CM_MEDLOW },
1602 { "medmid", CM_MEDMID },
1603 { "medany", CM_MEDANY },
1604 { "embmedany", CM_EMBMEDANY },
1605 { NULL, (enum cmodel) 0 }
1607 const struct code_model *cmodel;
1608 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1609 static struct cpu_default {
1610 const int cpu;
1611 const enum processor_type processor;
1612 } const cpu_default[] = {
1613 /* There must be one entry here for each TARGET_CPU value. */
1614 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1615 { TARGET_CPU_v8, PROCESSOR_V8 },
1616 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1617 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1618 { TARGET_CPU_leon, PROCESSOR_LEON },
1619 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1620 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1621 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1622 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1623 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1624 { TARGET_CPU_v9, PROCESSOR_V9 },
1625 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1626 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1627 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1628 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1629 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1630 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1631 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1632 { TARGET_CPU_m8, PROCESSOR_M8 },
1633 { -1, PROCESSOR_V7 }
1635 const struct cpu_default *def;
1636 /* Table of values for -m{cpu,tune}=. This must match the order of
1637 the enum processor_type in sparc-opts.h. */
1638 static struct cpu_table {
1639 const char *const name;
1640 const int disable;
1641 const int enable;
1642 } const cpu_table[] = {
1643 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1644 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1645 { "v8", MASK_ISA, MASK_V8 },
1646 /* TI TMS390Z55 supersparc */
1647 { "supersparc", MASK_ISA, MASK_V8 },
1648 { "hypersparc", MASK_ISA, MASK_V8 },
1649 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1650 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1651 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1652 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1653 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1654 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1655 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1656 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1657 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1658 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1659 /* TEMIC sparclet */
1660 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1661 { "v9", MASK_ISA, MASK_V9 },
1662 /* UltraSPARC I, II, IIi */
1663 { "ultrasparc", MASK_ISA,
1664 /* Although insns using %y are deprecated, it is a clear win. */
1665 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1666 /* UltraSPARC III */
1667 /* ??? Check if %y issue still holds true. */
1668 { "ultrasparc3", MASK_ISA,
1669 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1670 /* UltraSPARC T1 */
1671 { "niagara", MASK_ISA,
1672 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1673 /* UltraSPARC T2 */
1674 { "niagara2", MASK_ISA,
1675 MASK_V9|MASK_POPC|MASK_VIS2 },
1676 /* UltraSPARC T3 */
1677 { "niagara3", MASK_ISA,
1678 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1679 /* UltraSPARC T4 */
1680 { "niagara4", MASK_ISA,
1681 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1682 /* UltraSPARC M7 */
1683 { "niagara7", MASK_ISA,
1684 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1685 /* UltraSPARC M8 */
1686 { "m8", MASK_ISA,
1687 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1689 const struct cpu_table *cpu;
1690 unsigned int i;
1692 if (sparc_debug_string != NULL)
1694 const char *q;
1695 char *p;
1697 p = ASTRDUP (sparc_debug_string);
1698 while ((q = strtok (p, ",")) != NULL)
1700 bool invert;
1701 int mask;
1703 p = NULL;
1704 if (*q == '!')
1706 invert = true;
1707 q++;
1709 else
1710 invert = false;
1712 if (! strcmp (q, "all"))
1713 mask = MASK_DEBUG_ALL;
1714 else if (! strcmp (q, "options"))
1715 mask = MASK_DEBUG_OPTIONS;
1716 else
1717 error ("unknown -mdebug-%s switch", q);
1719 if (invert)
1720 sparc_debug &= ~mask;
1721 else
1722 sparc_debug |= mask;
1726 /* Enable the FsMULd instruction by default if not explicitly specified by
1727 the user. It may be later disabled by the CPU (explicitly or not). */
1728 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1729 target_flags |= MASK_FSMULD;
1731 if (TARGET_DEBUG_OPTIONS)
1733 dump_target_flags("Initial target_flags", target_flags);
1734 dump_target_flags("target_flags_explicit", target_flags_explicit);
1737 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1738 SUBTARGET_OVERRIDE_OPTIONS;
1739 #endif
1741 #ifndef SPARC_BI_ARCH
1742 /* Check for unsupported architecture size. */
1743 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1744 error ("%s is not supported by this configuration",
1745 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1746 #endif
1748 /* We force all 64bit archs to use 128 bit long double */
1749 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1751 error ("-mlong-double-64 not allowed with -m64");
1752 target_flags |= MASK_LONG_DOUBLE_128;
1755 /* Code model selection. */
1756 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1758 #ifdef SPARC_BI_ARCH
1759 if (TARGET_ARCH32)
1760 sparc_cmodel = CM_32;
1761 #endif
1763 if (sparc_cmodel_string != NULL)
1765 if (TARGET_ARCH64)
1767 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1768 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1769 break;
1770 if (cmodel->name == NULL)
1771 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1772 else
1773 sparc_cmodel = cmodel->value;
1775 else
1776 error ("-mcmodel= is not supported on 32-bit systems");
1779 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1780 for (i = 8; i < 16; i++)
1781 if (!call_used_regs [i])
1783 error ("-fcall-saved-REG is not supported for out registers");
1784 call_used_regs [i] = 1;
1787 /* Set the default CPU if no -mcpu option was specified. */
1788 if (!global_options_set.x_sparc_cpu_and_features)
1790 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1791 if (def->cpu == TARGET_CPU_DEFAULT)
1792 break;
1793 gcc_assert (def->cpu != -1);
1794 sparc_cpu_and_features = def->processor;
1797 /* Set the default CPU if no -mtune option was specified. */
1798 if (!global_options_set.x_sparc_cpu)
1799 sparc_cpu = sparc_cpu_and_features;
1801 cpu = &cpu_table[(int) sparc_cpu_and_features];
1803 if (TARGET_DEBUG_OPTIONS)
1805 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1806 dump_target_flags ("cpu->disable", cpu->disable);
1807 dump_target_flags ("cpu->enable", cpu->enable);
1810 target_flags &= ~cpu->disable;
1811 target_flags |= (cpu->enable
1812 #ifndef HAVE_AS_FMAF_HPC_VIS3
1813 & ~(MASK_FMAF | MASK_VIS3)
1814 #endif
1815 #ifndef HAVE_AS_SPARC4
1816 & ~MASK_CBCOND
1817 #endif
1818 #ifndef HAVE_AS_SPARC5_VIS4
1819 & ~(MASK_VIS4 | MASK_SUBXC)
1820 #endif
1821 #ifndef HAVE_AS_SPARC6
1822 & ~(MASK_VIS4B)
1823 #endif
1824 #ifndef HAVE_AS_LEON
1825 & ~(MASK_LEON | MASK_LEON3)
1826 #endif
1827 & ~(target_flags_explicit & MASK_FEATURES)
1830 /* -mvis2 implies -mvis. */
1831 if (TARGET_VIS2)
1832 target_flags |= MASK_VIS;
1834 /* -mvis3 implies -mvis2 and -mvis. */
1835 if (TARGET_VIS3)
1836 target_flags |= MASK_VIS2 | MASK_VIS;
1838 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1839 if (TARGET_VIS4)
1840 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1842 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1843 if (TARGET_VIS4B)
1844 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1846 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1847 FPU is disabled. */
1848 if (!TARGET_FPU)
1849 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1850 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1852 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1853 are available; -m64 also implies v9. */
1854 if (TARGET_VIS || TARGET_ARCH64)
1856 target_flags |= MASK_V9;
1857 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1860 /* -mvis also implies -mv8plus on 32-bit. */
1861 if (TARGET_VIS && !TARGET_ARCH64)
1862 target_flags |= MASK_V8PLUS;
1864 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1865 if (TARGET_V9 && TARGET_ARCH32)
1866 target_flags |= MASK_DEPRECATED_V8_INSNS;
1868 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1869 if (!TARGET_V9 || TARGET_ARCH64)
1870 target_flags &= ~MASK_V8PLUS;
1872 /* Don't use stack biasing in 32-bit mode. */
1873 if (TARGET_ARCH32)
1874 target_flags &= ~MASK_STACK_BIAS;
1876 /* Use LRA instead of reload, unless otherwise instructed. */
1877 if (!(target_flags_explicit & MASK_LRA))
1878 target_flags |= MASK_LRA;
1880 /* Enable applicable errata workarounds for LEON3FT. */
1881 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1883 sparc_fix_b2bst = 1;
1884 sparc_fix_lost_divsqrt = 1;
1887 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1888 if (sparc_fix_ut699)
1889 target_flags &= ~MASK_FSMULD;
1891 /* Supply a default value for align_functions. */
1892 if (align_functions == 0)
1894 if (sparc_cpu == PROCESSOR_ULTRASPARC
1895 || sparc_cpu == PROCESSOR_ULTRASPARC3
1896 || sparc_cpu == PROCESSOR_NIAGARA
1897 || sparc_cpu == PROCESSOR_NIAGARA2
1898 || sparc_cpu == PROCESSOR_NIAGARA3
1899 || sparc_cpu == PROCESSOR_NIAGARA4)
1900 align_functions = 32;
1901 else if (sparc_cpu == PROCESSOR_NIAGARA7
1902 || sparc_cpu == PROCESSOR_M8)
1903 align_functions = 64;
1906 /* Validate PCC_STRUCT_RETURN. */
1907 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1908 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1910 /* Only use .uaxword when compiling for a 64-bit target. */
1911 if (!TARGET_ARCH64)
1912 targetm.asm_out.unaligned_op.di = NULL;
1914 /* Do various machine dependent initializations. */
1915 sparc_init_modes ();
1917 /* Set up function hooks. */
1918 init_machine_status = sparc_init_machine_status;
1920 switch (sparc_cpu)
1922 case PROCESSOR_V7:
1923 case PROCESSOR_CYPRESS:
1924 sparc_costs = &cypress_costs;
1925 break;
1926 case PROCESSOR_V8:
1927 case PROCESSOR_SPARCLITE:
1928 case PROCESSOR_SUPERSPARC:
1929 sparc_costs = &supersparc_costs;
1930 break;
1931 case PROCESSOR_F930:
1932 case PROCESSOR_F934:
1933 case PROCESSOR_HYPERSPARC:
1934 case PROCESSOR_SPARCLITE86X:
1935 sparc_costs = &hypersparc_costs;
1936 break;
1937 case PROCESSOR_LEON:
1938 sparc_costs = &leon_costs;
1939 break;
1940 case PROCESSOR_LEON3:
1941 case PROCESSOR_LEON3V7:
1942 sparc_costs = &leon3_costs;
1943 break;
1944 case PROCESSOR_SPARCLET:
1945 case PROCESSOR_TSC701:
1946 sparc_costs = &sparclet_costs;
1947 break;
1948 case PROCESSOR_V9:
1949 case PROCESSOR_ULTRASPARC:
1950 sparc_costs = &ultrasparc_costs;
1951 break;
1952 case PROCESSOR_ULTRASPARC3:
1953 sparc_costs = &ultrasparc3_costs;
1954 break;
1955 case PROCESSOR_NIAGARA:
1956 sparc_costs = &niagara_costs;
1957 break;
1958 case PROCESSOR_NIAGARA2:
1959 sparc_costs = &niagara2_costs;
1960 break;
1961 case PROCESSOR_NIAGARA3:
1962 sparc_costs = &niagara3_costs;
1963 break;
1964 case PROCESSOR_NIAGARA4:
1965 sparc_costs = &niagara4_costs;
1966 break;
1967 case PROCESSOR_NIAGARA7:
1968 sparc_costs = &niagara7_costs;
1969 break;
1970 case PROCESSOR_M8:
1971 sparc_costs = &m8_costs;
1972 break;
1973 case PROCESSOR_NATIVE:
1974 gcc_unreachable ();
1977 if (sparc_memory_model == SMM_DEFAULT)
1979 /* Choose the memory model for the operating system. */
1980 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1981 if (os_default != SMM_DEFAULT)
1982 sparc_memory_model = os_default;
1983 /* Choose the most relaxed model for the processor. */
1984 else if (TARGET_V9)
1985 sparc_memory_model = SMM_RMO;
1986 else if (TARGET_LEON3)
1987 sparc_memory_model = SMM_TSO;
1988 else if (TARGET_LEON)
1989 sparc_memory_model = SMM_SC;
1990 else if (TARGET_V8)
1991 sparc_memory_model = SMM_PSO;
1992 else
1993 sparc_memory_model = SMM_SC;
1996 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1997 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1998 target_flags |= MASK_LONG_DOUBLE_128;
1999 #endif
2001 if (TARGET_DEBUG_OPTIONS)
2002 dump_target_flags ("Final target_flags", target_flags);
2004 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
2005 can run at the same time. More important, it is the threshold
2006 defining when additional prefetches will be dropped by the
2007 hardware.
2009 The UltraSPARC-III features a documented prefetch queue with a
2010 size of 8. Additional prefetches issued in the cpu are
2011 dropped.
2013 Niagara processors are different. In these processors prefetches
2014 are handled much like regular loads. The L1 miss buffer is 32
2015 entries, but prefetches start getting affected when 30 entries
2016 become occupied. That occupation could be a mix of regular loads
2017 and prefetches though. And that buffer is shared by all threads.
2018 Once the threshold is reached, if the core is running a single
2019 thread the prefetch will retry. If more than one thread is
2020 running, the prefetch will be dropped.
2022 All this makes it very difficult to determine how many
2023 simultaneous prefetches can be issued simultaneously, even in a
2024 single-threaded program. Experimental results show that setting
2025 this parameter to 32 works well when the number of threads is not
2026 high. */
2027 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
2028 ((sparc_cpu == PROCESSOR_ULTRASPARC
2029 || sparc_cpu == PROCESSOR_NIAGARA
2030 || sparc_cpu == PROCESSOR_NIAGARA2
2031 || sparc_cpu == PROCESSOR_NIAGARA3
2032 || sparc_cpu == PROCESSOR_NIAGARA4)
2034 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2035 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2036 || sparc_cpu == PROCESSOR_M8)
2037 ? 32 : 3))),
2038 global_options.x_param_values,
2039 global_options_set.x_param_values);
2041 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
2042 bytes.
2044 The Oracle SPARC Architecture (previously the UltraSPARC
2045 Architecture) specification states that when a PREFETCH[A]
2046 instruction is executed an implementation-specific amount of data
2047 is prefetched, and that it is at least 64 bytes long (aligned to
2048 at least 64 bytes).
2050 However, this is not correct. The M7 (and implementations prior
2051 to that) does not guarantee a 64B prefetch into a cache if the
2052 line size is smaller. A single cache line is all that is ever
2053 prefetched. So for the M7, where the L1D$ has 32B lines and the
2054 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2055 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2056 is a read_n prefetch, which is the only type which allocates to
2057 the L1.) */
2058 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
2059 (sparc_cpu == PROCESSOR_M8
2060 ? 64 : 32),
2061 global_options.x_param_values,
2062 global_options_set.x_param_values);
2064 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
2065 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2066 Niagara processors feature a L1D$ of 16KB. */
2067 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
2068 ((sparc_cpu == PROCESSOR_ULTRASPARC
2069 || sparc_cpu == PROCESSOR_ULTRASPARC3
2070 || sparc_cpu == PROCESSOR_NIAGARA
2071 || sparc_cpu == PROCESSOR_NIAGARA2
2072 || sparc_cpu == PROCESSOR_NIAGARA3
2073 || sparc_cpu == PROCESSOR_NIAGARA4
2074 || sparc_cpu == PROCESSOR_NIAGARA7
2075 || sparc_cpu == PROCESSOR_M8)
2076 ? 16 : 64),
2077 global_options.x_param_values,
2078 global_options_set.x_param_values);
2081 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
2082 that 512 is the default in params.def. */
2083 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
2084 ((sparc_cpu == PROCESSOR_NIAGARA4
2085 || sparc_cpu == PROCESSOR_M8)
2086 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2087 ? 256 : 512)),
2088 global_options.x_param_values,
2089 global_options_set.x_param_values);
2092 /* Disable save slot sharing for call-clobbered registers by default.
2093 The IRA sharing algorithm works on single registers only and this
2094 pessimizes for double floating-point registers. */
2095 if (!global_options_set.x_flag_ira_share_save_slots)
2096 flag_ira_share_save_slots = 0;
2098 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2099 redundant 32-to-64-bit extensions. */
2100 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2101 flag_ree = 0;
2104 /* Miscellaneous utilities. */
2106 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2107 or branch on register contents instructions. */
2110 v9_regcmp_p (enum rtx_code code)
2112 return (code == EQ || code == NE || code == GE || code == LT
2113 || code == LE || code == GT);
2116 /* Nonzero if OP is a floating point constant which can
2117 be loaded into an integer register using a single
2118 sethi instruction. */
2121 fp_sethi_p (rtx op)
2123 if (GET_CODE (op) == CONST_DOUBLE)
2125 long i;
2127 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2128 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2131 return 0;
2134 /* Nonzero if OP is a floating point constant which can
2135 be loaded into an integer register using a single
2136 mov instruction. */
2139 fp_mov_p (rtx op)
2141 if (GET_CODE (op) == CONST_DOUBLE)
2143 long i;
2145 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2146 return SPARC_SIMM13_P (i);
2149 return 0;
2152 /* Nonzero if OP is a floating point constant which can
2153 be loaded into an integer register using a high/losum
2154 instruction sequence. */
2157 fp_high_losum_p (rtx op)
2159 /* The constraints calling this should only be in
2160 SFmode move insns, so any constant which cannot
2161 be moved using a single insn will do. */
2162 if (GET_CODE (op) == CONST_DOUBLE)
2164 long i;
2166 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2167 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2170 return 0;
2173 /* Return true if the address of LABEL can be loaded by means of the
2174 mov{si,di}_pic_label_ref patterns in PIC mode. */
2176 static bool
2177 can_use_mov_pic_label_ref (rtx label)
2179 /* VxWorks does not impose a fixed gap between segments; the run-time
2180 gap can be different from the object-file gap. We therefore can't
2181 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2182 are absolutely sure that X is in the same segment as the GOT.
2183 Unfortunately, the flexibility of linker scripts means that we
2184 can't be sure of that in general, so assume that GOT-relative
2185 accesses are never valid on VxWorks. */
2186 if (TARGET_VXWORKS_RTP)
2187 return false;
2189 /* Similarly, if the label is non-local, it might end up being placed
2190 in a different section than the current one; now mov_pic_label_ref
2191 requires the label and the code to be in the same section. */
2192 if (LABEL_REF_NONLOCAL_P (label))
2193 return false;
2195 /* Finally, if we are reordering basic blocks and partition into hot
2196 and cold sections, this might happen for any label. */
2197 if (flag_reorder_blocks_and_partition)
2198 return false;
2200 return true;
2203 /* Expand a move instruction. Return true if all work is done. */
2205 bool
2206 sparc_expand_move (machine_mode mode, rtx *operands)
2208 /* Handle sets of MEM first. */
2209 if (GET_CODE (operands[0]) == MEM)
2211 /* 0 is a register (or a pair of registers) on SPARC. */
2212 if (register_or_zero_operand (operands[1], mode))
2213 return false;
2215 if (!reload_in_progress)
2217 operands[0] = validize_mem (operands[0]);
2218 operands[1] = force_reg (mode, operands[1]);
2222 /* Fixup TLS cases. */
2223 if (TARGET_HAVE_TLS
2224 && CONSTANT_P (operands[1])
2225 && sparc_tls_referenced_p (operands [1]))
2227 operands[1] = sparc_legitimize_tls_address (operands[1]);
2228 return false;
2231 /* Fixup PIC cases. */
2232 if (flag_pic && CONSTANT_P (operands[1]))
2234 if (pic_address_needs_scratch (operands[1]))
2235 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2237 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2238 if (GET_CODE (operands[1]) == LABEL_REF
2239 && can_use_mov_pic_label_ref (operands[1]))
2241 if (mode == SImode)
2243 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2244 return true;
2247 if (mode == DImode)
2249 gcc_assert (TARGET_ARCH64);
2250 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2251 return true;
2255 if (symbolic_operand (operands[1], mode))
2257 operands[1]
2258 = sparc_legitimize_pic_address (operands[1],
2259 reload_in_progress
2260 ? operands[0] : NULL_RTX);
2261 return false;
2265 /* If we are trying to toss an integer constant into FP registers,
2266 or loading a FP or vector constant, force it into memory. */
2267 if (CONSTANT_P (operands[1])
2268 && REG_P (operands[0])
2269 && (SPARC_FP_REG_P (REGNO (operands[0]))
2270 || SCALAR_FLOAT_MODE_P (mode)
2271 || VECTOR_MODE_P (mode)))
2273 /* emit_group_store will send such bogosity to us when it is
2274 not storing directly into memory. So fix this up to avoid
2275 crashes in output_constant_pool. */
2276 if (operands [1] == const0_rtx)
2277 operands[1] = CONST0_RTX (mode);
2279 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2280 always other regs. */
2281 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2282 && (const_zero_operand (operands[1], mode)
2283 || const_all_ones_operand (operands[1], mode)))
2284 return false;
2286 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2287 /* We are able to build any SF constant in integer registers
2288 with at most 2 instructions. */
2289 && (mode == SFmode
2290 /* And any DF constant in integer registers if needed. */
2291 || (mode == DFmode && !can_create_pseudo_p ())))
2292 return false;
2294 operands[1] = force_const_mem (mode, operands[1]);
2295 if (!reload_in_progress)
2296 operands[1] = validize_mem (operands[1]);
2297 return false;
2300 /* Accept non-constants and valid constants unmodified. */
2301 if (!CONSTANT_P (operands[1])
2302 || GET_CODE (operands[1]) == HIGH
2303 || input_operand (operands[1], mode))
2304 return false;
2306 switch (mode)
2308 case E_QImode:
2309 /* All QImode constants require only one insn, so proceed. */
2310 break;
2312 case E_HImode:
2313 case E_SImode:
2314 sparc_emit_set_const32 (operands[0], operands[1]);
2315 return true;
2317 case E_DImode:
2318 /* input_operand should have filtered out 32-bit mode. */
2319 sparc_emit_set_const64 (operands[0], operands[1]);
2320 return true;
2322 case E_TImode:
2324 rtx high, low;
2325 /* TImode isn't available in 32-bit mode. */
2326 split_double (operands[1], &high, &low);
2327 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2328 high));
2329 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2330 low));
2332 return true;
2334 default:
2335 gcc_unreachable ();
2338 return false;
2341 /* Load OP1, a 32-bit constant, into OP0, a register.
2342 We know it can't be done in one insn when we get
2343 here, the move expander guarantees this. */
2345 static void
2346 sparc_emit_set_const32 (rtx op0, rtx op1)
2348 machine_mode mode = GET_MODE (op0);
2349 rtx temp = op0;
2351 if (can_create_pseudo_p ())
2352 temp = gen_reg_rtx (mode);
2354 if (GET_CODE (op1) == CONST_INT)
2356 gcc_assert (!small_int_operand (op1, mode)
2357 && !const_high_operand (op1, mode));
2359 /* Emit them as real moves instead of a HIGH/LO_SUM,
2360 this way CSE can see everything and reuse intermediate
2361 values if it wants. */
2362 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2363 & ~(HOST_WIDE_INT) 0x3ff)));
2365 emit_insn (gen_rtx_SET (op0,
2366 gen_rtx_IOR (mode, temp,
2367 GEN_INT (INTVAL (op1) & 0x3ff))));
2369 else
2371 /* A symbol, emit in the traditional way. */
2372 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2373 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2377 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2378 If TEMP is nonzero, we are forbidden to use any other scratch
2379 registers. Otherwise, we are allowed to generate them as needed.
2381 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2382 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2384 void
2385 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2387 rtx cst, temp1, temp2, temp3, temp4, temp5;
2388 rtx ti_temp = 0;
2390 /* Deal with too large offsets. */
2391 if (GET_CODE (op1) == CONST
2392 && GET_CODE (XEXP (op1, 0)) == PLUS
2393 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2394 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2396 gcc_assert (!temp);
2397 temp1 = gen_reg_rtx (DImode);
2398 temp2 = gen_reg_rtx (DImode);
2399 sparc_emit_set_const64 (temp2, cst);
2400 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2401 NULL_RTX);
2402 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2403 return;
2406 if (temp && GET_MODE (temp) == TImode)
2408 ti_temp = temp;
2409 temp = gen_rtx_REG (DImode, REGNO (temp));
2412 /* SPARC-V9 code-model support. */
2413 switch (sparc_cmodel)
2415 case CM_MEDLOW:
2416 /* The range spanned by all instructions in the object is less
2417 than 2^31 bytes (2GB) and the distance from any instruction
2418 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2419 than 2^31 bytes (2GB).
2421 The executable must be in the low 4TB of the virtual address
2422 space.
2424 sethi %hi(symbol), %temp1
2425 or %temp1, %lo(symbol), %reg */
2426 if (temp)
2427 temp1 = temp; /* op0 is allowed. */
2428 else
2429 temp1 = gen_reg_rtx (DImode);
2431 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2432 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2433 break;
2435 case CM_MEDMID:
2436 /* The range spanned by all instructions in the object is less
2437 than 2^31 bytes (2GB) and the distance from any instruction
2438 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2439 than 2^31 bytes (2GB).
2441 The executable must be in the low 16TB of the virtual address
2442 space.
2444 sethi %h44(symbol), %temp1
2445 or %temp1, %m44(symbol), %temp2
2446 sllx %temp2, 12, %temp3
2447 or %temp3, %l44(symbol), %reg */
2448 if (temp)
2450 temp1 = op0;
2451 temp2 = op0;
2452 temp3 = temp; /* op0 is allowed. */
2454 else
2456 temp1 = gen_reg_rtx (DImode);
2457 temp2 = gen_reg_rtx (DImode);
2458 temp3 = gen_reg_rtx (DImode);
2461 emit_insn (gen_seth44 (temp1, op1));
2462 emit_insn (gen_setm44 (temp2, temp1, op1));
2463 emit_insn (gen_rtx_SET (temp3,
2464 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2465 emit_insn (gen_setl44 (op0, temp3, op1));
2466 break;
2468 case CM_MEDANY:
2469 /* The range spanned by all instructions in the object is less
2470 than 2^31 bytes (2GB) and the distance from any instruction
2471 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2472 than 2^31 bytes (2GB).
2474 The executable can be placed anywhere in the virtual address
2475 space.
2477 sethi %hh(symbol), %temp1
2478 sethi %lm(symbol), %temp2
2479 or %temp1, %hm(symbol), %temp3
2480 sllx %temp3, 32, %temp4
2481 or %temp4, %temp2, %temp5
2482 or %temp5, %lo(symbol), %reg */
2483 if (temp)
2485 /* It is possible that one of the registers we got for operands[2]
2486 might coincide with that of operands[0] (which is why we made
2487 it TImode). Pick the other one to use as our scratch. */
2488 if (rtx_equal_p (temp, op0))
2490 gcc_assert (ti_temp);
2491 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2493 temp1 = op0;
2494 temp2 = temp; /* op0 is _not_ allowed, see above. */
2495 temp3 = op0;
2496 temp4 = op0;
2497 temp5 = op0;
2499 else
2501 temp1 = gen_reg_rtx (DImode);
2502 temp2 = gen_reg_rtx (DImode);
2503 temp3 = gen_reg_rtx (DImode);
2504 temp4 = gen_reg_rtx (DImode);
2505 temp5 = gen_reg_rtx (DImode);
2508 emit_insn (gen_sethh (temp1, op1));
2509 emit_insn (gen_setlm (temp2, op1));
2510 emit_insn (gen_sethm (temp3, temp1, op1));
2511 emit_insn (gen_rtx_SET (temp4,
2512 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2513 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2514 emit_insn (gen_setlo (op0, temp5, op1));
2515 break;
2517 case CM_EMBMEDANY:
2518 /* Old old old backwards compatibility kruft here.
2519 Essentially it is MEDLOW with a fixed 64-bit
2520 virtual base added to all data segment addresses.
2521 Text-segment stuff is computed like MEDANY, we can't
2522 reuse the code above because the relocation knobs
2523 look different.
2525 Data segment: sethi %hi(symbol), %temp1
2526 add %temp1, EMBMEDANY_BASE_REG, %temp2
2527 or %temp2, %lo(symbol), %reg */
2528 if (data_segment_operand (op1, GET_MODE (op1)))
2530 if (temp)
2532 temp1 = temp; /* op0 is allowed. */
2533 temp2 = op0;
2535 else
2537 temp1 = gen_reg_rtx (DImode);
2538 temp2 = gen_reg_rtx (DImode);
2541 emit_insn (gen_embmedany_sethi (temp1, op1));
2542 emit_insn (gen_embmedany_brsum (temp2, temp1));
2543 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2546 /* Text segment: sethi %uhi(symbol), %temp1
2547 sethi %hi(symbol), %temp2
2548 or %temp1, %ulo(symbol), %temp3
2549 sllx %temp3, 32, %temp4
2550 or %temp4, %temp2, %temp5
2551 or %temp5, %lo(symbol), %reg */
2552 else
2554 if (temp)
2556 /* It is possible that one of the registers we got for operands[2]
2557 might coincide with that of operands[0] (which is why we made
2558 it TImode). Pick the other one to use as our scratch. */
2559 if (rtx_equal_p (temp, op0))
2561 gcc_assert (ti_temp);
2562 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2564 temp1 = op0;
2565 temp2 = temp; /* op0 is _not_ allowed, see above. */
2566 temp3 = op0;
2567 temp4 = op0;
2568 temp5 = op0;
2570 else
2572 temp1 = gen_reg_rtx (DImode);
2573 temp2 = gen_reg_rtx (DImode);
2574 temp3 = gen_reg_rtx (DImode);
2575 temp4 = gen_reg_rtx (DImode);
2576 temp5 = gen_reg_rtx (DImode);
2579 emit_insn (gen_embmedany_textuhi (temp1, op1));
2580 emit_insn (gen_embmedany_texthi (temp2, op1));
2581 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2582 emit_insn (gen_rtx_SET (temp4,
2583 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2584 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2585 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2587 break;
2589 default:
2590 gcc_unreachable ();
2594 /* These avoid problems when cross compiling. If we do not
2595 go through all this hair then the optimizer will see
2596 invalid REG_EQUAL notes or in some cases none at all. */
2597 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2598 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2599 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2600 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2602 /* The optimizer is not to assume anything about exactly
2603 which bits are set for a HIGH, they are unspecified.
2604 Unfortunately this leads to many missed optimizations
2605 during CSE. We mask out the non-HIGH bits, and matches
2606 a plain movdi, to alleviate this problem. */
2607 static rtx
2608 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2610 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2613 static rtx
2614 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2616 return gen_rtx_SET (dest, GEN_INT (val));
2619 static rtx
2620 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2622 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2625 static rtx
2626 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2628 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2631 /* Worker routines for 64-bit constant formation on arch64.
2632 One of the key things to be doing in these emissions is
2633 to create as many temp REGs as possible. This makes it
2634 possible for half-built constants to be used later when
2635 such values are similar to something required later on.
2636 Without doing this, the optimizer cannot see such
2637 opportunities. */
2639 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2640 unsigned HOST_WIDE_INT, int);
2642 static void
2643 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2644 unsigned HOST_WIDE_INT low_bits, int is_neg)
2646 unsigned HOST_WIDE_INT high_bits;
2648 if (is_neg)
2649 high_bits = (~low_bits) & 0xffffffff;
2650 else
2651 high_bits = low_bits;
2653 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2654 if (!is_neg)
2656 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2658 else
2660 /* If we are XOR'ing with -1, then we should emit a one's complement
2661 instead. This way the combiner will notice logical operations
2662 such as ANDN later on and substitute. */
2663 if ((low_bits & 0x3ff) == 0x3ff)
2665 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2667 else
2669 emit_insn (gen_rtx_SET (op0,
2670 gen_safe_XOR64 (temp,
2671 (-(HOST_WIDE_INT)0x400
2672 | (low_bits & 0x3ff)))));
2677 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2678 unsigned HOST_WIDE_INT, int);
2680 static void
2681 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2682 unsigned HOST_WIDE_INT high_bits,
2683 unsigned HOST_WIDE_INT low_immediate,
2684 int shift_count)
2686 rtx temp2 = op0;
2688 if ((high_bits & 0xfffffc00) != 0)
2690 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2691 if ((high_bits & ~0xfffffc00) != 0)
2692 emit_insn (gen_rtx_SET (op0,
2693 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2694 else
2695 temp2 = temp;
2697 else
2699 emit_insn (gen_safe_SET64 (temp, high_bits));
2700 temp2 = temp;
2703 /* Now shift it up into place. */
2704 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2705 GEN_INT (shift_count))));
2707 /* If there is a low immediate part piece, finish up by
2708 putting that in as well. */
2709 if (low_immediate != 0)
2710 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2713 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2714 unsigned HOST_WIDE_INT);
2716 /* Full 64-bit constant decomposition. Even though this is the
2717 'worst' case, we still optimize a few things away. */
2718 static void
2719 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2720 unsigned HOST_WIDE_INT high_bits,
2721 unsigned HOST_WIDE_INT low_bits)
2723 rtx sub_temp = op0;
2725 if (can_create_pseudo_p ())
2726 sub_temp = gen_reg_rtx (DImode);
2728 if ((high_bits & 0xfffffc00) != 0)
2730 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2731 if ((high_bits & ~0xfffffc00) != 0)
2732 emit_insn (gen_rtx_SET (sub_temp,
2733 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2734 else
2735 sub_temp = temp;
2737 else
2739 emit_insn (gen_safe_SET64 (temp, high_bits));
2740 sub_temp = temp;
2743 if (can_create_pseudo_p ())
2745 rtx temp2 = gen_reg_rtx (DImode);
2746 rtx temp3 = gen_reg_rtx (DImode);
2747 rtx temp4 = gen_reg_rtx (DImode);
2749 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2750 GEN_INT (32))));
2752 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2753 if ((low_bits & ~0xfffffc00) != 0)
2755 emit_insn (gen_rtx_SET (temp3,
2756 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2757 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2759 else
2761 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2764 else
2766 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2767 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2768 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2769 int to_shift = 12;
2771 /* We are in the middle of reload, so this is really
2772 painful. However we do still make an attempt to
2773 avoid emitting truly stupid code. */
2774 if (low1 != const0_rtx)
2776 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2777 GEN_INT (to_shift))));
2778 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2779 sub_temp = op0;
2780 to_shift = 12;
2782 else
2784 to_shift += 12;
2786 if (low2 != const0_rtx)
2788 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2789 GEN_INT (to_shift))));
2790 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2791 sub_temp = op0;
2792 to_shift = 8;
2794 else
2796 to_shift += 8;
2798 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2799 GEN_INT (to_shift))));
2800 if (low3 != const0_rtx)
2801 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2802 /* phew... */
2806 /* Analyze a 64-bit constant for certain properties. */
2807 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2808 unsigned HOST_WIDE_INT,
2809 int *, int *, int *);
2811 static void
2812 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2813 unsigned HOST_WIDE_INT low_bits,
2814 int *hbsp, int *lbsp, int *abbasp)
2816 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2817 int i;
2819 lowest_bit_set = highest_bit_set = -1;
2820 i = 0;
2823 if ((lowest_bit_set == -1)
2824 && ((low_bits >> i) & 1))
2825 lowest_bit_set = i;
2826 if ((highest_bit_set == -1)
2827 && ((high_bits >> (32 - i - 1)) & 1))
2828 highest_bit_set = (64 - i - 1);
2830 while (++i < 32
2831 && ((highest_bit_set == -1)
2832 || (lowest_bit_set == -1)));
2833 if (i == 32)
2835 i = 0;
2838 if ((lowest_bit_set == -1)
2839 && ((high_bits >> i) & 1))
2840 lowest_bit_set = i + 32;
2841 if ((highest_bit_set == -1)
2842 && ((low_bits >> (32 - i - 1)) & 1))
2843 highest_bit_set = 32 - i - 1;
2845 while (++i < 32
2846 && ((highest_bit_set == -1)
2847 || (lowest_bit_set == -1)));
2849 /* If there are no bits set this should have gone out
2850 as one instruction! */
2851 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2852 all_bits_between_are_set = 1;
2853 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2855 if (i < 32)
2857 if ((low_bits & (1 << i)) != 0)
2858 continue;
2860 else
2862 if ((high_bits & (1 << (i - 32))) != 0)
2863 continue;
2865 all_bits_between_are_set = 0;
2866 break;
2868 *hbsp = highest_bit_set;
2869 *lbsp = lowest_bit_set;
2870 *abbasp = all_bits_between_are_set;
2873 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2875 static int
2876 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2877 unsigned HOST_WIDE_INT low_bits)
2879 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2881 if (high_bits == 0
2882 || high_bits == 0xffffffff)
2883 return 1;
2885 analyze_64bit_constant (high_bits, low_bits,
2886 &highest_bit_set, &lowest_bit_set,
2887 &all_bits_between_are_set);
2889 if ((highest_bit_set == 63
2890 || lowest_bit_set == 0)
2891 && all_bits_between_are_set != 0)
2892 return 1;
2894 if ((highest_bit_set - lowest_bit_set) < 21)
2895 return 1;
2897 return 0;
2900 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2901 unsigned HOST_WIDE_INT,
2902 int, int);
2904 static unsigned HOST_WIDE_INT
2905 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2906 unsigned HOST_WIDE_INT low_bits,
2907 int lowest_bit_set, int shift)
2909 HOST_WIDE_INT hi, lo;
2911 if (lowest_bit_set < 32)
2913 lo = (low_bits >> lowest_bit_set) << shift;
2914 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2916 else
2918 lo = 0;
2919 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2921 gcc_assert (! (hi & lo));
2922 return (hi | lo);
2925 /* Here we are sure to be arch64 and this is an integer constant
2926 being loaded into a register. Emit the most efficient
2927 insn sequence possible. Detection of all the 1-insn cases
2928 has been done already. */
2929 static void
2930 sparc_emit_set_const64 (rtx op0, rtx op1)
2932 unsigned HOST_WIDE_INT high_bits, low_bits;
2933 int lowest_bit_set, highest_bit_set;
2934 int all_bits_between_are_set;
2935 rtx temp = 0;
2937 /* Sanity check that we know what we are working with. */
2938 gcc_assert (TARGET_ARCH64
2939 && (GET_CODE (op0) == SUBREG
2940 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2942 if (! can_create_pseudo_p ())
2943 temp = op0;
2945 if (GET_CODE (op1) != CONST_INT)
2947 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2948 return;
2951 if (! temp)
2952 temp = gen_reg_rtx (DImode);
2954 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2955 low_bits = (INTVAL (op1) & 0xffffffff);
2957 /* low_bits bits 0 --> 31
2958 high_bits bits 32 --> 63 */
2960 analyze_64bit_constant (high_bits, low_bits,
2961 &highest_bit_set, &lowest_bit_set,
2962 &all_bits_between_are_set);
2964 /* First try for a 2-insn sequence. */
2966 /* These situations are preferred because the optimizer can
2967 * do more things with them:
2968 * 1) mov -1, %reg
2969 * sllx %reg, shift, %reg
2970 * 2) mov -1, %reg
2971 * srlx %reg, shift, %reg
2972 * 3) mov some_small_const, %reg
2973 * sllx %reg, shift, %reg
2975 if (((highest_bit_set == 63
2976 || lowest_bit_set == 0)
2977 && all_bits_between_are_set != 0)
2978 || ((highest_bit_set - lowest_bit_set) < 12))
2980 HOST_WIDE_INT the_const = -1;
2981 int shift = lowest_bit_set;
2983 if ((highest_bit_set != 63
2984 && lowest_bit_set != 0)
2985 || all_bits_between_are_set == 0)
2987 the_const =
2988 create_simple_focus_bits (high_bits, low_bits,
2989 lowest_bit_set, 0);
2991 else if (lowest_bit_set == 0)
2992 shift = -(63 - highest_bit_set);
2994 gcc_assert (SPARC_SIMM13_P (the_const));
2995 gcc_assert (shift != 0);
2997 emit_insn (gen_safe_SET64 (temp, the_const));
2998 if (shift > 0)
2999 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3000 GEN_INT (shift))));
3001 else if (shift < 0)
3002 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3003 GEN_INT (-shift))));
3004 return;
3007 /* Now a range of 22 or less bits set somewhere.
3008 * 1) sethi %hi(focus_bits), %reg
3009 * sllx %reg, shift, %reg
3010 * 2) sethi %hi(focus_bits), %reg
3011 * srlx %reg, shift, %reg
3013 if ((highest_bit_set - lowest_bit_set) < 21)
3015 unsigned HOST_WIDE_INT focus_bits =
3016 create_simple_focus_bits (high_bits, low_bits,
3017 lowest_bit_set, 10);
3019 gcc_assert (SPARC_SETHI_P (focus_bits));
3020 gcc_assert (lowest_bit_set != 10);
3022 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3024 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3025 if (lowest_bit_set < 10)
3026 emit_insn (gen_rtx_SET (op0,
3027 gen_rtx_LSHIFTRT (DImode, temp,
3028 GEN_INT (10 - lowest_bit_set))));
3029 else if (lowest_bit_set > 10)
3030 emit_insn (gen_rtx_SET (op0,
3031 gen_rtx_ASHIFT (DImode, temp,
3032 GEN_INT (lowest_bit_set - 10))));
3033 return;
3036 /* 1) sethi %hi(low_bits), %reg
3037 * or %reg, %lo(low_bits), %reg
3038 * 2) sethi %hi(~low_bits), %reg
3039 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3041 if (high_bits == 0
3042 || high_bits == 0xffffffff)
3044 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3045 (high_bits == 0xffffffff));
3046 return;
3049 /* Now, try 3-insn sequences. */
3051 /* 1) sethi %hi(high_bits), %reg
3052 * or %reg, %lo(high_bits), %reg
3053 * sllx %reg, 32, %reg
3055 if (low_bits == 0)
3057 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3058 return;
3061 /* We may be able to do something quick
3062 when the constant is negated, so try that. */
3063 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3064 (~low_bits) & 0xfffffc00))
3066 /* NOTE: The trailing bits get XOR'd so we need the
3067 non-negated bits, not the negated ones. */
3068 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3070 if ((((~high_bits) & 0xffffffff) == 0
3071 && ((~low_bits) & 0x80000000) == 0)
3072 || (((~high_bits) & 0xffffffff) == 0xffffffff
3073 && ((~low_bits) & 0x80000000) != 0))
3075 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3077 if ((SPARC_SETHI_P (fast_int)
3078 && (~high_bits & 0xffffffff) == 0)
3079 || SPARC_SIMM13_P (fast_int))
3080 emit_insn (gen_safe_SET64 (temp, fast_int));
3081 else
3082 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3084 else
3086 rtx negated_const;
3087 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3088 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3089 sparc_emit_set_const64 (temp, negated_const);
3092 /* If we are XOR'ing with -1, then we should emit a one's complement
3093 instead. This way the combiner will notice logical operations
3094 such as ANDN later on and substitute. */
3095 if (trailing_bits == 0x3ff)
3097 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3099 else
3101 emit_insn (gen_rtx_SET (op0,
3102 gen_safe_XOR64 (temp,
3103 (-0x400 | trailing_bits))));
3105 return;
3108 /* 1) sethi %hi(xxx), %reg
3109 * or %reg, %lo(xxx), %reg
3110 * sllx %reg, yyy, %reg
3112 * ??? This is just a generalized version of the low_bits==0
3113 * thing above, FIXME...
3115 if ((highest_bit_set - lowest_bit_set) < 32)
3117 unsigned HOST_WIDE_INT focus_bits =
3118 create_simple_focus_bits (high_bits, low_bits,
3119 lowest_bit_set, 0);
3121 /* We can't get here in this state. */
3122 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3124 /* So what we know is that the set bits straddle the
3125 middle of the 64-bit word. */
3126 sparc_emit_set_const64_quick2 (op0, temp,
3127 focus_bits, 0,
3128 lowest_bit_set);
3129 return;
3132 /* 1) sethi %hi(high_bits), %reg
3133 * or %reg, %lo(high_bits), %reg
3134 * sllx %reg, 32, %reg
3135 * or %reg, low_bits, %reg
3137 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3139 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3140 return;
3143 /* The easiest way when all else fails, is full decomposition. */
3144 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3147 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3149 static bool
3150 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3152 *p1 = SPARC_ICC_REG;
3153 *p2 = SPARC_FCC_REG;
3154 return true;
3157 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3159 static unsigned int
3160 sparc_min_arithmetic_precision (void)
3162 return 32;
3165 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3166 return the mode to be used for the comparison. For floating-point,
3167 CCFP[E]mode is used. CCNZmode should be used when the first operand
3168 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3169 processing is needed. */
3171 machine_mode
3172 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3174 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3176 switch (op)
3178 case EQ:
3179 case NE:
3180 case UNORDERED:
3181 case ORDERED:
3182 case UNLT:
3183 case UNLE:
3184 case UNGT:
3185 case UNGE:
3186 case UNEQ:
3187 case LTGT:
3188 return CCFPmode;
3190 case LT:
3191 case LE:
3192 case GT:
3193 case GE:
3194 return CCFPEmode;
3196 default:
3197 gcc_unreachable ();
3200 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3201 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3202 && y == const0_rtx)
3204 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3205 return CCXNZmode;
3206 else
3207 return CCNZmode;
3209 else
3211 /* This is for the cmp<mode>_sne pattern. */
3212 if (GET_CODE (x) == NOT && y == constm1_rtx)
3214 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3215 return CCXCmode;
3216 else
3217 return CCCmode;
3220 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3221 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3223 if (GET_CODE (y) == UNSPEC
3224 && (XINT (y, 1) == UNSPEC_ADDV
3225 || XINT (y, 1) == UNSPEC_SUBV
3226 || XINT (y, 1) == UNSPEC_NEGV))
3227 return CCVmode;
3228 else
3229 return CCCmode;
3232 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3233 return CCXmode;
3234 else
3235 return CCmode;
3239 /* Emit the compare insn and return the CC reg for a CODE comparison
3240 with operands X and Y. */
3242 static rtx
3243 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3245 machine_mode mode;
3246 rtx cc_reg;
3248 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3249 return x;
3251 mode = SELECT_CC_MODE (code, x, y);
3253 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3254 fcc regs (cse can't tell they're really call clobbered regs and will
3255 remove a duplicate comparison even if there is an intervening function
3256 call - it will then try to reload the cc reg via an int reg which is why
3257 we need the movcc patterns). It is possible to provide the movcc
3258 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3259 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3260 to tell cse that CCFPE mode registers (even pseudos) are call
3261 clobbered. */
3263 /* ??? This is an experiment. Rather than making changes to cse which may
3264 or may not be easy/clean, we do our own cse. This is possible because
3265 we will generate hard registers. Cse knows they're call clobbered (it
3266 doesn't know the same thing about pseudos). If we guess wrong, no big
3267 deal, but if we win, great! */
3269 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3270 #if 1 /* experiment */
3272 int reg;
3273 /* We cycle through the registers to ensure they're all exercised. */
3274 static int next_fcc_reg = 0;
3275 /* Previous x,y for each fcc reg. */
3276 static rtx prev_args[4][2];
3278 /* Scan prev_args for x,y. */
3279 for (reg = 0; reg < 4; reg++)
3280 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3281 break;
3282 if (reg == 4)
3284 reg = next_fcc_reg;
3285 prev_args[reg][0] = x;
3286 prev_args[reg][1] = y;
3287 next_fcc_reg = (next_fcc_reg + 1) & 3;
3289 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3291 #else
3292 cc_reg = gen_reg_rtx (mode);
3293 #endif /* ! experiment */
3294 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3295 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3296 else
3297 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3299 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3300 will only result in an unrecognizable insn so no point in asserting. */
3301 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3303 return cc_reg;
3307 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3310 gen_compare_reg (rtx cmp)
3312 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3315 /* This function is used for v9 only.
3316 DEST is the target of the Scc insn.
3317 CODE is the code for an Scc's comparison.
3318 X and Y are the values we compare.
3320 This function is needed to turn
3322 (set (reg:SI 110)
3323 (gt (reg:CCX 100 %icc)
3324 (const_int 0)))
3325 into
3326 (set (reg:SI 110)
3327 (gt:DI (reg:CCX 100 %icc)
3328 (const_int 0)))
3330 IE: The instruction recognizer needs to see the mode of the comparison to
3331 find the right instruction. We could use "gt:DI" right in the
3332 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3334 static int
3335 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3337 if (! TARGET_ARCH64
3338 && (GET_MODE (x) == DImode
3339 || GET_MODE (dest) == DImode))
3340 return 0;
3342 /* Try to use the movrCC insns. */
3343 if (TARGET_ARCH64
3344 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3345 && y == const0_rtx
3346 && v9_regcmp_p (compare_code))
3348 rtx op0 = x;
3349 rtx temp;
3351 /* Special case for op0 != 0. This can be done with one instruction if
3352 dest == x. */
3354 if (compare_code == NE
3355 && GET_MODE (dest) == DImode
3356 && rtx_equal_p (op0, dest))
3358 emit_insn (gen_rtx_SET (dest,
3359 gen_rtx_IF_THEN_ELSE (DImode,
3360 gen_rtx_fmt_ee (compare_code, DImode,
3361 op0, const0_rtx),
3362 const1_rtx,
3363 dest)));
3364 return 1;
3367 if (reg_overlap_mentioned_p (dest, op0))
3369 /* Handle the case where dest == x.
3370 We "early clobber" the result. */
3371 op0 = gen_reg_rtx (GET_MODE (x));
3372 emit_move_insn (op0, x);
3375 emit_insn (gen_rtx_SET (dest, const0_rtx));
3376 if (GET_MODE (op0) != DImode)
3378 temp = gen_reg_rtx (DImode);
3379 convert_move (temp, op0, 0);
3381 else
3382 temp = op0;
3383 emit_insn (gen_rtx_SET (dest,
3384 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3385 gen_rtx_fmt_ee (compare_code, DImode,
3386 temp, const0_rtx),
3387 const1_rtx,
3388 dest)));
3389 return 1;
3391 else
3393 x = gen_compare_reg_1 (compare_code, x, y);
3394 y = const0_rtx;
3396 emit_insn (gen_rtx_SET (dest, const0_rtx));
3397 emit_insn (gen_rtx_SET (dest,
3398 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3399 gen_rtx_fmt_ee (compare_code,
3400 GET_MODE (x), x, y),
3401 const1_rtx, dest)));
3402 return 1;
3407 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3408 without jumps using the addx/subx instructions. */
3410 bool
3411 emit_scc_insn (rtx operands[])
3413 rtx tem, x, y;
3414 enum rtx_code code;
3415 machine_mode mode;
3417 /* The quad-word fp compare library routines all return nonzero to indicate
3418 true, which is different from the equivalent libgcc routines, so we must
3419 handle them specially here. */
3420 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3422 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3423 GET_CODE (operands[1]));
3424 operands[2] = XEXP (operands[1], 0);
3425 operands[3] = XEXP (operands[1], 1);
3428 code = GET_CODE (operands[1]);
3429 x = operands[2];
3430 y = operands[3];
3431 mode = GET_MODE (x);
3433 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3434 more applications). The exception to this is "reg != 0" which can
3435 be done in one instruction on v9 (so we do it). */
3436 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3438 if (y != const0_rtx)
3439 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3441 rtx pat = gen_rtx_SET (operands[0],
3442 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3443 x, const0_rtx));
3445 /* If we can use addx/subx or addxc, add a clobber for CC. */
3446 if (mode == SImode || (code == NE && TARGET_VIS3))
3448 rtx clobber
3449 = gen_rtx_CLOBBER (VOIDmode,
3450 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3451 SPARC_ICC_REG));
3452 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3455 emit_insn (pat);
3456 return true;
3459 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3460 if (TARGET_ARCH64
3461 && mode == DImode
3462 && !((code == LTU || code == GTU) && TARGET_VIS3)
3463 && gen_v9_scc (operands[0], code, x, y))
3464 return true;
3466 /* We can do LTU and GEU using the addx/subx instructions too. And
3467 for GTU/LEU, if both operands are registers swap them and fall
3468 back to the easy case. */
3469 if (code == GTU || code == LEU)
3471 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3472 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3474 tem = x;
3475 x = y;
3476 y = tem;
3477 code = swap_condition (code);
3481 if (code == LTU || code == GEU)
3483 emit_insn (gen_rtx_SET (operands[0],
3484 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3485 gen_compare_reg_1 (code, x, y),
3486 const0_rtx)));
3487 return true;
3490 /* All the posibilities to use addx/subx based sequences has been
3491 exhausted, try for a 3 instruction sequence using v9 conditional
3492 moves. */
3493 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3494 return true;
3496 /* Nope, do branches. */
3497 return false;
3500 /* Emit a conditional jump insn for the v9 architecture using comparison code
3501 CODE and jump target LABEL.
3502 This function exists to take advantage of the v9 brxx insns. */
3504 static void
3505 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3507 emit_jump_insn (gen_rtx_SET (pc_rtx,
3508 gen_rtx_IF_THEN_ELSE (VOIDmode,
3509 gen_rtx_fmt_ee (code, GET_MODE (op0),
3510 op0, const0_rtx),
3511 gen_rtx_LABEL_REF (VOIDmode, label),
3512 pc_rtx)));
3515 /* Emit a conditional jump insn for the UA2011 architecture using
3516 comparison code CODE and jump target LABEL. This function exists
3517 to take advantage of the UA2011 Compare and Branch insns. */
3519 static void
3520 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3522 rtx if_then_else;
3524 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3525 gen_rtx_fmt_ee(code, GET_MODE(op0),
3526 op0, op1),
3527 gen_rtx_LABEL_REF (VOIDmode, label),
3528 pc_rtx);
3530 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3533 void
3534 emit_conditional_branch_insn (rtx operands[])
3536 /* The quad-word fp compare library routines all return nonzero to indicate
3537 true, which is different from the equivalent libgcc routines, so we must
3538 handle them specially here. */
3539 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3541 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3542 GET_CODE (operands[0]));
3543 operands[1] = XEXP (operands[0], 0);
3544 operands[2] = XEXP (operands[0], 1);
3547 /* If we can tell early on that the comparison is against a constant
3548 that won't fit in the 5-bit signed immediate field of a cbcond,
3549 use one of the other v9 conditional branch sequences. */
3550 if (TARGET_CBCOND
3551 && GET_CODE (operands[1]) == REG
3552 && (GET_MODE (operands[1]) == SImode
3553 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3554 && (GET_CODE (operands[2]) != CONST_INT
3555 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3557 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3558 return;
3561 if (TARGET_ARCH64 && operands[2] == const0_rtx
3562 && GET_CODE (operands[1]) == REG
3563 && GET_MODE (operands[1]) == DImode)
3565 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3566 return;
3569 operands[1] = gen_compare_reg (operands[0]);
3570 operands[2] = const0_rtx;
3571 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3572 operands[1], operands[2]);
3573 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3574 operands[3]));
3578 /* Generate a DFmode part of a hard TFmode register.
3579 REG is the TFmode hard register, LOW is 1 for the
3580 low 64bit of the register and 0 otherwise.
3583 gen_df_reg (rtx reg, int low)
3585 int regno = REGNO (reg);
3587 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3588 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3589 return gen_rtx_REG (DFmode, regno);
3592 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3593 Unlike normal calls, TFmode operands are passed by reference. It is
3594 assumed that no more than 3 operands are required. */
3596 static void
3597 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3599 rtx ret_slot = NULL, arg[3], func_sym;
3600 int i;
3602 /* We only expect to be called for conversions, unary, and binary ops. */
3603 gcc_assert (nargs == 2 || nargs == 3);
3605 for (i = 0; i < nargs; ++i)
3607 rtx this_arg = operands[i];
3608 rtx this_slot;
3610 /* TFmode arguments and return values are passed by reference. */
3611 if (GET_MODE (this_arg) == TFmode)
3613 int force_stack_temp;
3615 force_stack_temp = 0;
3616 if (TARGET_BUGGY_QP_LIB && i == 0)
3617 force_stack_temp = 1;
3619 if (GET_CODE (this_arg) == MEM
3620 && ! force_stack_temp)
3622 tree expr = MEM_EXPR (this_arg);
3623 if (expr)
3624 mark_addressable (expr);
3625 this_arg = XEXP (this_arg, 0);
3627 else if (CONSTANT_P (this_arg)
3628 && ! force_stack_temp)
3630 this_slot = force_const_mem (TFmode, this_arg);
3631 this_arg = XEXP (this_slot, 0);
3633 else
3635 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3637 /* Operand 0 is the return value. We'll copy it out later. */
3638 if (i > 0)
3639 emit_move_insn (this_slot, this_arg);
3640 else
3641 ret_slot = this_slot;
3643 this_arg = XEXP (this_slot, 0);
3647 arg[i] = this_arg;
3650 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3652 if (GET_MODE (operands[0]) == TFmode)
3654 if (nargs == 2)
3655 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3656 arg[0], GET_MODE (arg[0]),
3657 arg[1], GET_MODE (arg[1]));
3658 else
3659 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3660 arg[0], GET_MODE (arg[0]),
3661 arg[1], GET_MODE (arg[1]),
3662 arg[2], GET_MODE (arg[2]));
3664 if (ret_slot)
3665 emit_move_insn (operands[0], ret_slot);
3667 else
3669 rtx ret;
3671 gcc_assert (nargs == 2);
3673 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3674 GET_MODE (operands[0]),
3675 arg[1], GET_MODE (arg[1]));
3677 if (ret != operands[0])
3678 emit_move_insn (operands[0], ret);
3682 /* Expand soft-float TFmode calls to sparc abi routines. */
3684 static void
3685 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3687 const char *func;
3689 switch (code)
3691 case PLUS:
3692 func = "_Qp_add";
3693 break;
3694 case MINUS:
3695 func = "_Qp_sub";
3696 break;
3697 case MULT:
3698 func = "_Qp_mul";
3699 break;
3700 case DIV:
3701 func = "_Qp_div";
3702 break;
3703 default:
3704 gcc_unreachable ();
3707 emit_soft_tfmode_libcall (func, 3, operands);
3710 static void
3711 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3713 const char *func;
3715 gcc_assert (code == SQRT);
3716 func = "_Qp_sqrt";
3718 emit_soft_tfmode_libcall (func, 2, operands);
3721 static void
3722 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3724 const char *func;
3726 switch (code)
3728 case FLOAT_EXTEND:
3729 switch (GET_MODE (operands[1]))
3731 case E_SFmode:
3732 func = "_Qp_stoq";
3733 break;
3734 case E_DFmode:
3735 func = "_Qp_dtoq";
3736 break;
3737 default:
3738 gcc_unreachable ();
3740 break;
3742 case FLOAT_TRUNCATE:
3743 switch (GET_MODE (operands[0]))
3745 case E_SFmode:
3746 func = "_Qp_qtos";
3747 break;
3748 case E_DFmode:
3749 func = "_Qp_qtod";
3750 break;
3751 default:
3752 gcc_unreachable ();
3754 break;
3756 case FLOAT:
3757 switch (GET_MODE (operands[1]))
3759 case E_SImode:
3760 func = "_Qp_itoq";
3761 if (TARGET_ARCH64)
3762 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3763 break;
3764 case E_DImode:
3765 func = "_Qp_xtoq";
3766 break;
3767 default:
3768 gcc_unreachable ();
3770 break;
3772 case UNSIGNED_FLOAT:
3773 switch (GET_MODE (operands[1]))
3775 case E_SImode:
3776 func = "_Qp_uitoq";
3777 if (TARGET_ARCH64)
3778 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3779 break;
3780 case E_DImode:
3781 func = "_Qp_uxtoq";
3782 break;
3783 default:
3784 gcc_unreachable ();
3786 break;
3788 case FIX:
3789 switch (GET_MODE (operands[0]))
3791 case E_SImode:
3792 func = "_Qp_qtoi";
3793 break;
3794 case E_DImode:
3795 func = "_Qp_qtox";
3796 break;
3797 default:
3798 gcc_unreachable ();
3800 break;
3802 case UNSIGNED_FIX:
3803 switch (GET_MODE (operands[0]))
3805 case E_SImode:
3806 func = "_Qp_qtoui";
3807 break;
3808 case E_DImode:
3809 func = "_Qp_qtoux";
3810 break;
3811 default:
3812 gcc_unreachable ();
3814 break;
3816 default:
3817 gcc_unreachable ();
3820 emit_soft_tfmode_libcall (func, 2, operands);
3823 /* Expand a hard-float tfmode operation. All arguments must be in
3824 registers. */
3826 static void
3827 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3829 rtx op, dest;
3831 if (GET_RTX_CLASS (code) == RTX_UNARY)
3833 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3834 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3836 else
3838 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3839 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3840 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3841 operands[1], operands[2]);
3844 if (register_operand (operands[0], VOIDmode))
3845 dest = operands[0];
3846 else
3847 dest = gen_reg_rtx (GET_MODE (operands[0]));
3849 emit_insn (gen_rtx_SET (dest, op));
3851 if (dest != operands[0])
3852 emit_move_insn (operands[0], dest);
3855 void
3856 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3858 if (TARGET_HARD_QUAD)
3859 emit_hard_tfmode_operation (code, operands);
3860 else
3861 emit_soft_tfmode_binop (code, operands);
3864 void
3865 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3867 if (TARGET_HARD_QUAD)
3868 emit_hard_tfmode_operation (code, operands);
3869 else
3870 emit_soft_tfmode_unop (code, operands);
3873 void
3874 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3876 if (TARGET_HARD_QUAD)
3877 emit_hard_tfmode_operation (code, operands);
3878 else
3879 emit_soft_tfmode_cvt (code, operands);
3882 /* Return nonzero if a branch/jump/call instruction will be emitting
3883 nop into its delay slot. */
3886 empty_delay_slot (rtx_insn *insn)
3888 rtx seq;
3890 /* If no previous instruction (should not happen), return true. */
3891 if (PREV_INSN (insn) == NULL)
3892 return 1;
3894 seq = NEXT_INSN (PREV_INSN (insn));
3895 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3896 return 0;
3898 return 1;
3901 /* Return nonzero if we should emit a nop after a cbcond instruction.
3902 The cbcond instruction does not have a delay slot, however there is
3903 a severe performance penalty if a control transfer appears right
3904 after a cbcond. Therefore we emit a nop when we detect this
3905 situation. */
3908 emit_cbcond_nop (rtx_insn *insn)
3910 rtx next = next_active_insn (insn);
3912 if (!next)
3913 return 1;
3915 if (NONJUMP_INSN_P (next)
3916 && GET_CODE (PATTERN (next)) == SEQUENCE)
3917 next = XVECEXP (PATTERN (next), 0, 0);
3918 else if (CALL_P (next)
3919 && GET_CODE (PATTERN (next)) == PARALLEL)
3921 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3923 if (GET_CODE (delay) == RETURN)
3925 /* It's a sibling call. Do not emit the nop if we're going
3926 to emit something other than the jump itself as the first
3927 instruction of the sibcall sequence. */
3928 if (sparc_leaf_function_p || TARGET_FLAT)
3929 return 0;
3933 if (NONJUMP_INSN_P (next))
3934 return 0;
3936 return 1;
3939 /* Return nonzero if TRIAL can go into the call delay slot. */
3942 eligible_for_call_delay (rtx_insn *trial)
3944 rtx pat;
3946 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3947 return 0;
3949 /* Binutils allows
3950 call __tls_get_addr, %tgd_call (foo)
3951 add %l7, %o0, %o0, %tgd_add (foo)
3952 while Sun as/ld does not. */
3953 if (TARGET_GNU_TLS || !TARGET_TLS)
3954 return 1;
3956 pat = PATTERN (trial);
3958 /* We must reject tgd_add{32|64}, i.e.
3959 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3960 and tldm_add{32|64}, i.e.
3961 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3962 for Sun as/ld. */
3963 if (GET_CODE (pat) == SET
3964 && GET_CODE (SET_SRC (pat)) == PLUS)
3966 rtx unspec = XEXP (SET_SRC (pat), 1);
3968 if (GET_CODE (unspec) == UNSPEC
3969 && (XINT (unspec, 1) == UNSPEC_TLSGD
3970 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3971 return 0;
3974 return 1;
3977 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3978 instruction. RETURN_P is true if the v9 variant 'return' is to be
3979 considered in the test too.
3981 TRIAL must be a SET whose destination is a REG appropriate for the
3982 'restore' instruction or, if RETURN_P is true, for the 'return'
3983 instruction. */
3985 static int
3986 eligible_for_restore_insn (rtx trial, bool return_p)
3988 rtx pat = PATTERN (trial);
3989 rtx src = SET_SRC (pat);
3990 bool src_is_freg = false;
3991 rtx src_reg;
3993 /* Since we now can do moves between float and integer registers when
3994 VIS3 is enabled, we have to catch this case. We can allow such
3995 moves when doing a 'return' however. */
3996 src_reg = src;
3997 if (GET_CODE (src_reg) == SUBREG)
3998 src_reg = SUBREG_REG (src_reg);
3999 if (GET_CODE (src_reg) == REG
4000 && SPARC_FP_REG_P (REGNO (src_reg)))
4001 src_is_freg = true;
4003 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4004 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4005 && arith_operand (src, GET_MODE (src))
4006 && ! src_is_freg)
4008 if (TARGET_ARCH64)
4009 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4010 else
4011 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4014 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4015 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4016 && arith_double_operand (src, GET_MODE (src))
4017 && ! src_is_freg)
4018 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4020 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4021 else if (! TARGET_FPU && register_operand (src, SFmode))
4022 return 1;
4024 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4025 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4026 return 1;
4028 /* If we have the 'return' instruction, anything that does not use
4029 local or output registers and can go into a delay slot wins. */
4030 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4031 return 1;
4033 /* The 'restore src1,src2,dest' pattern for SImode. */
4034 else if (GET_CODE (src) == PLUS
4035 && register_operand (XEXP (src, 0), SImode)
4036 && arith_operand (XEXP (src, 1), SImode))
4037 return 1;
4039 /* The 'restore src1,src2,dest' pattern for DImode. */
4040 else if (GET_CODE (src) == PLUS
4041 && register_operand (XEXP (src, 0), DImode)
4042 && arith_double_operand (XEXP (src, 1), DImode))
4043 return 1;
4045 /* The 'restore src1,%lo(src2),dest' pattern. */
4046 else if (GET_CODE (src) == LO_SUM
4047 && ! TARGET_CM_MEDMID
4048 && ((register_operand (XEXP (src, 0), SImode)
4049 && immediate_operand (XEXP (src, 1), SImode))
4050 || (TARGET_ARCH64
4051 && register_operand (XEXP (src, 0), DImode)
4052 && immediate_operand (XEXP (src, 1), DImode))))
4053 return 1;
4055 /* The 'restore src,src,dest' pattern. */
4056 else if (GET_CODE (src) == ASHIFT
4057 && (register_operand (XEXP (src, 0), SImode)
4058 || register_operand (XEXP (src, 0), DImode))
4059 && XEXP (src, 1) == const1_rtx)
4060 return 1;
4062 return 0;
4065 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4068 eligible_for_return_delay (rtx_insn *trial)
4070 int regno;
4071 rtx pat;
4073 /* If the function uses __builtin_eh_return, the eh_return machinery
4074 occupies the delay slot. */
4075 if (crtl->calls_eh_return)
4076 return 0;
4078 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4079 return 0;
4081 /* In the case of a leaf or flat function, anything can go into the slot. */
4082 if (sparc_leaf_function_p || TARGET_FLAT)
4083 return 1;
4085 if (!NONJUMP_INSN_P (trial))
4086 return 0;
4088 pat = PATTERN (trial);
4089 if (GET_CODE (pat) == PARALLEL)
4091 int i;
4093 if (! TARGET_V9)
4094 return 0;
4095 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4097 rtx expr = XVECEXP (pat, 0, i);
4098 if (GET_CODE (expr) != SET)
4099 return 0;
4100 if (GET_CODE (SET_DEST (expr)) != REG)
4101 return 0;
4102 regno = REGNO (SET_DEST (expr));
4103 if (regno >= 8 && regno < 24)
4104 return 0;
4106 return !epilogue_renumber (&pat, 1);
4109 if (GET_CODE (pat) != SET)
4110 return 0;
4112 if (GET_CODE (SET_DEST (pat)) != REG)
4113 return 0;
4115 regno = REGNO (SET_DEST (pat));
4117 /* Otherwise, only operations which can be done in tandem with
4118 a `restore' or `return' insn can go into the delay slot. */
4119 if (regno >= 8 && regno < 24)
4120 return 0;
4122 /* If this instruction sets up floating point register and we have a return
4123 instruction, it can probably go in. But restore will not work
4124 with FP_REGS. */
4125 if (! SPARC_INT_REG_P (regno))
4126 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4128 return eligible_for_restore_insn (trial, true);
4131 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4134 eligible_for_sibcall_delay (rtx_insn *trial)
4136 rtx pat;
4138 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4139 return 0;
4141 if (!NONJUMP_INSN_P (trial))
4142 return 0;
4144 pat = PATTERN (trial);
4146 if (sparc_leaf_function_p || TARGET_FLAT)
4148 /* If the tail call is done using the call instruction,
4149 we have to restore %o7 in the delay slot. */
4150 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4151 return 0;
4153 /* %g1 is used to build the function address */
4154 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4155 return 0;
4157 return 1;
4160 if (GET_CODE (pat) != SET)
4161 return 0;
4163 /* Otherwise, only operations which can be done in tandem with
4164 a `restore' insn can go into the delay slot. */
4165 if (GET_CODE (SET_DEST (pat)) != REG
4166 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4167 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4168 return 0;
4170 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4171 in most cases. */
4172 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4173 return 0;
4175 return eligible_for_restore_insn (trial, false);
4178 /* Determine if it's legal to put X into the constant pool. This
4179 is not possible if X contains the address of a symbol that is
4180 not constant (TLS) or not known at final link time (PIC). */
4182 static bool
4183 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4185 switch (GET_CODE (x))
4187 case CONST_INT:
4188 case CONST_WIDE_INT:
4189 case CONST_DOUBLE:
4190 case CONST_VECTOR:
4191 /* Accept all non-symbolic constants. */
4192 return false;
4194 case LABEL_REF:
4195 /* Labels are OK iff we are non-PIC. */
4196 return flag_pic != 0;
4198 case SYMBOL_REF:
4199 /* 'Naked' TLS symbol references are never OK,
4200 non-TLS symbols are OK iff we are non-PIC. */
4201 if (SYMBOL_REF_TLS_MODEL (x))
4202 return true;
4203 else
4204 return flag_pic != 0;
4206 case CONST:
4207 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4208 case PLUS:
4209 case MINUS:
4210 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4211 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4212 case UNSPEC:
4213 return true;
4214 default:
4215 gcc_unreachable ();
4219 /* Global Offset Table support. */
4220 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4221 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4223 /* Return the SYMBOL_REF for the Global Offset Table. */
4225 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4227 static rtx
4228 sparc_got (void)
4230 if (!sparc_got_symbol)
4231 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4233 return sparc_got_symbol;
4236 /* Ensure that we are not using patterns that are not OK with PIC. */
4239 check_pic (int i)
4241 rtx op;
4243 switch (flag_pic)
4245 case 1:
4246 op = recog_data.operand[i];
4247 gcc_assert (GET_CODE (op) != SYMBOL_REF
4248 && (GET_CODE (op) != CONST
4249 || (GET_CODE (XEXP (op, 0)) == MINUS
4250 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4251 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4252 /* fallthrough */
4253 case 2:
4254 default:
4255 return 1;
4259 /* Return true if X is an address which needs a temporary register when
4260 reloaded while generating PIC code. */
4263 pic_address_needs_scratch (rtx x)
4265 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4266 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4267 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4268 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4269 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4270 return 1;
4272 return 0;
4275 /* Determine if a given RTX is a valid constant. We already know this
4276 satisfies CONSTANT_P. */
4278 static bool
4279 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4281 switch (GET_CODE (x))
4283 case CONST:
4284 case SYMBOL_REF:
4285 if (sparc_tls_referenced_p (x))
4286 return false;
4287 break;
4289 case CONST_DOUBLE:
4290 /* Floating point constants are generally not ok.
4291 The only exception is 0.0 and all-ones in VIS. */
4292 if (TARGET_VIS
4293 && SCALAR_FLOAT_MODE_P (mode)
4294 && (const_zero_operand (x, mode)
4295 || const_all_ones_operand (x, mode)))
4296 return true;
4298 return false;
4300 case CONST_VECTOR:
4301 /* Vector constants are generally not ok.
4302 The only exception is 0 or -1 in VIS. */
4303 if (TARGET_VIS
4304 && (const_zero_operand (x, mode)
4305 || const_all_ones_operand (x, mode)))
4306 return true;
4308 return false;
4310 default:
4311 break;
4314 return true;
4317 /* Determine if a given RTX is a valid constant address. */
4319 bool
4320 constant_address_p (rtx x)
4322 switch (GET_CODE (x))
4324 case LABEL_REF:
4325 case CONST_INT:
4326 case HIGH:
4327 return true;
4329 case CONST:
4330 if (flag_pic && pic_address_needs_scratch (x))
4331 return false;
4332 return sparc_legitimate_constant_p (Pmode, x);
4334 case SYMBOL_REF:
4335 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4337 default:
4338 return false;
4342 /* Nonzero if the constant value X is a legitimate general operand
4343 when generating PIC code. It is given that flag_pic is on and
4344 that X satisfies CONSTANT_P. */
4346 bool
4347 legitimate_pic_operand_p (rtx x)
4349 if (pic_address_needs_scratch (x))
4350 return false;
4351 if (sparc_tls_referenced_p (x))
4352 return false;
4353 return true;
4356 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4357 (CONST_INT_P (X) \
4358 && INTVAL (X) >= -0x1000 \
4359 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4361 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4362 (CONST_INT_P (X) \
4363 && INTVAL (X) >= -0x1000 \
4364 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4366 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4368 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4369 ordinarily. This changes a bit when generating PIC. */
4371 static bool
4372 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4374 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4376 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4377 rs1 = addr;
4378 else if (GET_CODE (addr) == PLUS)
4380 rs1 = XEXP (addr, 0);
4381 rs2 = XEXP (addr, 1);
4383 /* Canonicalize. REG comes first, if there are no regs,
4384 LO_SUM comes first. */
4385 if (!REG_P (rs1)
4386 && GET_CODE (rs1) != SUBREG
4387 && (REG_P (rs2)
4388 || GET_CODE (rs2) == SUBREG
4389 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4391 rs1 = XEXP (addr, 1);
4392 rs2 = XEXP (addr, 0);
4395 if ((flag_pic == 1
4396 && rs1 == pic_offset_table_rtx
4397 && !REG_P (rs2)
4398 && GET_CODE (rs2) != SUBREG
4399 && GET_CODE (rs2) != LO_SUM
4400 && GET_CODE (rs2) != MEM
4401 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4402 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4403 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4404 || ((REG_P (rs1)
4405 || GET_CODE (rs1) == SUBREG)
4406 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4408 imm1 = rs2;
4409 rs2 = NULL;
4411 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4412 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4414 /* We prohibit REG + REG for TFmode when there are no quad move insns
4415 and we consequently need to split. We do this because REG+REG
4416 is not an offsettable address. If we get the situation in reload
4417 where source and destination of a movtf pattern are both MEMs with
4418 REG+REG address, then only one of them gets converted to an
4419 offsettable address. */
4420 if (mode == TFmode
4421 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4422 return 0;
4424 /* Likewise for TImode, but in all cases. */
4425 if (mode == TImode)
4426 return 0;
4428 /* We prohibit REG + REG on ARCH32 if not optimizing for
4429 DFmode/DImode because then mem_min_alignment is likely to be zero
4430 after reload and the forced split would lack a matching splitter
4431 pattern. */
4432 if (TARGET_ARCH32 && !optimize
4433 && (mode == DFmode || mode == DImode))
4434 return 0;
4436 else if (USE_AS_OFFSETABLE_LO10
4437 && GET_CODE (rs1) == LO_SUM
4438 && TARGET_ARCH64
4439 && ! TARGET_CM_MEDMID
4440 && RTX_OK_FOR_OLO10_P (rs2, mode))
4442 rs2 = NULL;
4443 imm1 = XEXP (rs1, 1);
4444 rs1 = XEXP (rs1, 0);
4445 if (!CONSTANT_P (imm1)
4446 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4447 return 0;
4450 else if (GET_CODE (addr) == LO_SUM)
4452 rs1 = XEXP (addr, 0);
4453 imm1 = XEXP (addr, 1);
4455 if (!CONSTANT_P (imm1)
4456 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4457 return 0;
4459 /* We can't allow TFmode in 32-bit mode, because an offset greater
4460 than the alignment (8) may cause the LO_SUM to overflow. */
4461 if (mode == TFmode && TARGET_ARCH32)
4462 return 0;
4464 /* During reload, accept the HIGH+LO_SUM construct generated by
4465 sparc_legitimize_reload_address. */
4466 if (reload_in_progress
4467 && GET_CODE (rs1) == HIGH
4468 && XEXP (rs1, 0) == imm1)
4469 return 1;
4471 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4472 return 1;
4473 else
4474 return 0;
4476 if (GET_CODE (rs1) == SUBREG)
4477 rs1 = SUBREG_REG (rs1);
4478 if (!REG_P (rs1))
4479 return 0;
4481 if (rs2)
4483 if (GET_CODE (rs2) == SUBREG)
4484 rs2 = SUBREG_REG (rs2);
4485 if (!REG_P (rs2))
4486 return 0;
4489 if (strict)
4491 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4492 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4493 return 0;
4495 else
4497 if ((! SPARC_INT_REG_P (REGNO (rs1))
4498 && REGNO (rs1) != FRAME_POINTER_REGNUM
4499 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4500 || (rs2
4501 && (! SPARC_INT_REG_P (REGNO (rs2))
4502 && REGNO (rs2) != FRAME_POINTER_REGNUM
4503 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4504 return 0;
4506 return 1;
4509 /* Return the SYMBOL_REF for the tls_get_addr function. */
4511 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4513 static rtx
4514 sparc_tls_get_addr (void)
4516 if (!sparc_tls_symbol)
4517 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4519 return sparc_tls_symbol;
4522 /* Return the Global Offset Table to be used in TLS mode. */
4524 static rtx
4525 sparc_tls_got (void)
4527 /* In PIC mode, this is just the PIC offset table. */
4528 if (flag_pic)
4530 crtl->uses_pic_offset_table = 1;
4531 return pic_offset_table_rtx;
4534 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4535 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4536 if (TARGET_SUN_TLS && TARGET_ARCH32)
4538 load_got_register ();
4539 return global_offset_table_rtx;
4542 /* In all other cases, we load a new pseudo with the GOT symbol. */
4543 return copy_to_reg (sparc_got ());
4546 /* Return true if X contains a thread-local symbol. */
4548 static bool
4549 sparc_tls_referenced_p (rtx x)
4551 if (!TARGET_HAVE_TLS)
4552 return false;
4554 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4555 x = XEXP (XEXP (x, 0), 0);
4557 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4558 return true;
4560 /* That's all we handle in sparc_legitimize_tls_address for now. */
4561 return false;
4564 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4565 this (thread-local) address. */
4567 static rtx
4568 sparc_legitimize_tls_address (rtx addr)
4570 rtx temp1, temp2, temp3, ret, o0, got;
4571 rtx_insn *insn;
4573 gcc_assert (can_create_pseudo_p ());
4575 if (GET_CODE (addr) == SYMBOL_REF)
4576 switch (SYMBOL_REF_TLS_MODEL (addr))
4578 case TLS_MODEL_GLOBAL_DYNAMIC:
4579 start_sequence ();
4580 temp1 = gen_reg_rtx (SImode);
4581 temp2 = gen_reg_rtx (SImode);
4582 ret = gen_reg_rtx (Pmode);
4583 o0 = gen_rtx_REG (Pmode, 8);
4584 got = sparc_tls_got ();
4585 emit_insn (gen_tgd_hi22 (temp1, addr));
4586 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4587 if (TARGET_ARCH32)
4589 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4590 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4591 addr, const1_rtx));
4593 else
4595 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4596 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4597 addr, const1_rtx));
4599 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4600 insn = get_insns ();
4601 end_sequence ();
4602 emit_libcall_block (insn, ret, o0, addr);
4603 break;
4605 case TLS_MODEL_LOCAL_DYNAMIC:
4606 start_sequence ();
4607 temp1 = gen_reg_rtx (SImode);
4608 temp2 = gen_reg_rtx (SImode);
4609 temp3 = gen_reg_rtx (Pmode);
4610 ret = gen_reg_rtx (Pmode);
4611 o0 = gen_rtx_REG (Pmode, 8);
4612 got = sparc_tls_got ();
4613 emit_insn (gen_tldm_hi22 (temp1));
4614 emit_insn (gen_tldm_lo10 (temp2, temp1));
4615 if (TARGET_ARCH32)
4617 emit_insn (gen_tldm_add32 (o0, got, temp2));
4618 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4619 const1_rtx));
4621 else
4623 emit_insn (gen_tldm_add64 (o0, got, temp2));
4624 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4625 const1_rtx));
4627 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4628 insn = get_insns ();
4629 end_sequence ();
4630 emit_libcall_block (insn, temp3, o0,
4631 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4632 UNSPEC_TLSLD_BASE));
4633 temp1 = gen_reg_rtx (SImode);
4634 temp2 = gen_reg_rtx (SImode);
4635 emit_insn (gen_tldo_hix22 (temp1, addr));
4636 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4637 if (TARGET_ARCH32)
4638 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4639 else
4640 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4641 break;
4643 case TLS_MODEL_INITIAL_EXEC:
4644 temp1 = gen_reg_rtx (SImode);
4645 temp2 = gen_reg_rtx (SImode);
4646 temp3 = gen_reg_rtx (Pmode);
4647 got = sparc_tls_got ();
4648 emit_insn (gen_tie_hi22 (temp1, addr));
4649 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4650 if (TARGET_ARCH32)
4651 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4652 else
4653 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4654 if (TARGET_SUN_TLS)
4656 ret = gen_reg_rtx (Pmode);
4657 if (TARGET_ARCH32)
4658 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4659 temp3, addr));
4660 else
4661 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4662 temp3, addr));
4664 else
4665 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4666 break;
4668 case TLS_MODEL_LOCAL_EXEC:
4669 temp1 = gen_reg_rtx (Pmode);
4670 temp2 = gen_reg_rtx (Pmode);
4671 if (TARGET_ARCH32)
4673 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4674 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4676 else
4678 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4679 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4681 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4682 break;
4684 default:
4685 gcc_unreachable ();
4688 else if (GET_CODE (addr) == CONST)
4690 rtx base, offset;
4692 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4694 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4695 offset = XEXP (XEXP (addr, 0), 1);
4697 base = force_operand (base, NULL_RTX);
4698 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4699 offset = force_reg (Pmode, offset);
4700 ret = gen_rtx_PLUS (Pmode, base, offset);
4703 else
4704 gcc_unreachable (); /* for now ... */
4706 return ret;
4709 /* Legitimize PIC addresses. If the address is already position-independent,
4710 we return ORIG. Newly generated position-independent addresses go into a
4711 reg. This is REG if nonzero, otherwise we allocate register(s) as
4712 necessary. */
4714 static rtx
4715 sparc_legitimize_pic_address (rtx orig, rtx reg)
4717 bool gotdata_op = false;
4719 if (GET_CODE (orig) == SYMBOL_REF
4720 /* See the comment in sparc_expand_move. */
4721 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4723 rtx pic_ref, address;
4724 rtx_insn *insn;
4726 if (reg == 0)
4728 gcc_assert (can_create_pseudo_p ());
4729 reg = gen_reg_rtx (Pmode);
4732 if (flag_pic == 2)
4734 /* If not during reload, allocate another temp reg here for loading
4735 in the address, so that these instructions can be optimized
4736 properly. */
4737 rtx temp_reg = (! can_create_pseudo_p ()
4738 ? reg : gen_reg_rtx (Pmode));
4740 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4741 won't get confused into thinking that these two instructions
4742 are loading in the true address of the symbol. If in the
4743 future a PIC rtx exists, that should be used instead. */
4744 if (TARGET_ARCH64)
4746 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4747 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4749 else
4751 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4752 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4754 address = temp_reg;
4755 gotdata_op = true;
4757 else
4758 address = orig;
4760 crtl->uses_pic_offset_table = 1;
4761 if (gotdata_op)
4763 if (TARGET_ARCH64)
4764 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4765 pic_offset_table_rtx,
4766 address, orig));
4767 else
4768 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4769 pic_offset_table_rtx,
4770 address, orig));
4772 else
4774 pic_ref
4775 = gen_const_mem (Pmode,
4776 gen_rtx_PLUS (Pmode,
4777 pic_offset_table_rtx, address));
4778 insn = emit_move_insn (reg, pic_ref);
4781 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4782 by loop. */
4783 set_unique_reg_note (insn, REG_EQUAL, orig);
4784 return reg;
4786 else if (GET_CODE (orig) == CONST)
4788 rtx base, offset;
4790 if (GET_CODE (XEXP (orig, 0)) == PLUS
4791 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4792 return orig;
4794 if (reg == 0)
4796 gcc_assert (can_create_pseudo_p ());
4797 reg = gen_reg_rtx (Pmode);
4800 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4801 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4802 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4803 base == reg ? NULL_RTX : reg);
4805 if (GET_CODE (offset) == CONST_INT)
4807 if (SMALL_INT (offset))
4808 return plus_constant (Pmode, base, INTVAL (offset));
4809 else if (can_create_pseudo_p ())
4810 offset = force_reg (Pmode, offset);
4811 else
4812 /* If we reach here, then something is seriously wrong. */
4813 gcc_unreachable ();
4815 return gen_rtx_PLUS (Pmode, base, offset);
4817 else if (GET_CODE (orig) == LABEL_REF)
4818 /* ??? We ought to be checking that the register is live instead, in case
4819 it is eliminated. */
4820 crtl->uses_pic_offset_table = 1;
4822 return orig;
4825 /* Try machine-dependent ways of modifying an illegitimate address X
4826 to be legitimate. If we find one, return the new, valid address.
4828 OLDX is the address as it was before break_out_memory_refs was called.
4829 In some cases it is useful to look at this to decide what needs to be done.
4831 MODE is the mode of the operand pointed to by X.
4833 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4835 static rtx
4836 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4837 machine_mode mode)
4839 rtx orig_x = x;
4841 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4842 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4843 force_operand (XEXP (x, 0), NULL_RTX));
4844 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4845 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4846 force_operand (XEXP (x, 1), NULL_RTX));
4847 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4848 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4849 XEXP (x, 1));
4850 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4851 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4852 force_operand (XEXP (x, 1), NULL_RTX));
4854 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4855 return x;
4857 if (sparc_tls_referenced_p (x))
4858 x = sparc_legitimize_tls_address (x);
4859 else if (flag_pic)
4860 x = sparc_legitimize_pic_address (x, NULL_RTX);
4861 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4862 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4863 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4864 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4865 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4866 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4867 else if (GET_CODE (x) == SYMBOL_REF
4868 || GET_CODE (x) == CONST
4869 || GET_CODE (x) == LABEL_REF)
4870 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4872 return x;
4875 /* Delegitimize an address that was legitimized by the above function. */
4877 static rtx
4878 sparc_delegitimize_address (rtx x)
4880 x = delegitimize_mem_from_attrs (x);
4882 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4883 switch (XINT (XEXP (x, 1), 1))
4885 case UNSPEC_MOVE_PIC:
4886 case UNSPEC_TLSLE:
4887 x = XVECEXP (XEXP (x, 1), 0, 0);
4888 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4889 break;
4890 default:
4891 break;
4894 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4895 if (GET_CODE (x) == MINUS
4896 && REG_P (XEXP (x, 0))
4897 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4898 && GET_CODE (XEXP (x, 1)) == LO_SUM
4899 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4900 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4902 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4903 gcc_assert (GET_CODE (x) == LABEL_REF);
4906 return x;
4909 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4910 replace the input X, or the original X if no replacement is called for.
4911 The output parameter *WIN is 1 if the calling macro should goto WIN,
4912 0 if it should not.
4914 For SPARC, we wish to handle addresses by splitting them into
4915 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4916 This cuts the number of extra insns by one.
4918 Do nothing when generating PIC code and the address is a symbolic
4919 operand or requires a scratch register. */
4922 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4923 int opnum, int type,
4924 int ind_levels ATTRIBUTE_UNUSED, int *win)
4926 /* Decompose SImode constants into HIGH+LO_SUM. */
4927 if (CONSTANT_P (x)
4928 && (mode != TFmode || TARGET_ARCH64)
4929 && GET_MODE (x) == SImode
4930 && GET_CODE (x) != LO_SUM
4931 && GET_CODE (x) != HIGH
4932 && sparc_cmodel <= CM_MEDLOW
4933 && !(flag_pic
4934 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4936 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4937 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4938 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4939 opnum, (enum reload_type)type);
4940 *win = 1;
4941 return x;
4944 /* We have to recognize what we have already generated above. */
4945 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4947 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4948 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4949 opnum, (enum reload_type)type);
4950 *win = 1;
4951 return x;
4954 *win = 0;
4955 return x;
4958 /* Return true if ADDR (a legitimate address expression)
4959 has an effect that depends on the machine mode it is used for.
4961 In PIC mode,
4963 (mem:HI [%l7+a])
4965 is not equivalent to
4967 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4969 because [%l7+a+1] is interpreted as the address of (a+1). */
4972 static bool
4973 sparc_mode_dependent_address_p (const_rtx addr,
4974 addr_space_t as ATTRIBUTE_UNUSED)
4976 if (flag_pic && GET_CODE (addr) == PLUS)
4978 rtx op0 = XEXP (addr, 0);
4979 rtx op1 = XEXP (addr, 1);
4980 if (op0 == pic_offset_table_rtx
4981 && symbolic_operand (op1, VOIDmode))
4982 return true;
4985 return false;
4988 #ifdef HAVE_GAS_HIDDEN
4989 # define USE_HIDDEN_LINKONCE 1
4990 #else
4991 # define USE_HIDDEN_LINKONCE 0
4992 #endif
4994 static void
4995 get_pc_thunk_name (char name[32], unsigned int regno)
4997 const char *reg_name = reg_names[regno];
4999 /* Skip the leading '%' as that cannot be used in a
5000 symbol name. */
5001 reg_name += 1;
5003 if (USE_HIDDEN_LINKONCE)
5004 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
5005 else
5006 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
5009 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
5011 static rtx
5012 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
5014 int orig_flag_pic = flag_pic;
5015 rtx insn;
5017 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
5018 flag_pic = 0;
5019 if (TARGET_ARCH64)
5020 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
5021 else
5022 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
5023 flag_pic = orig_flag_pic;
5025 return insn;
5028 /* Emit code to load the GOT register. */
5030 void
5031 load_got_register (void)
5033 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
5034 if (!global_offset_table_rtx)
5035 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
5037 if (TARGET_VXWORKS_RTP)
5038 emit_insn (gen_vxworks_load_got ());
5039 else
5041 /* The GOT symbol is subject to a PC-relative relocation so we need a
5042 helper function to add the PC value and thus get the final value. */
5043 if (!got_helper_rtx)
5045 char name[32];
5046 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
5047 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5050 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
5051 got_helper_rtx,
5052 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
5055 /* Need to emit this whether or not we obey regdecls,
5056 since setjmp/longjmp can cause life info to screw up.
5057 ??? In the case where we don't obey regdecls, this is not sufficient
5058 since we may not fall out the bottom. */
5059 emit_use (global_offset_table_rtx);
5062 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5063 address of the call target. */
5065 void
5066 sparc_emit_call_insn (rtx pat, rtx addr)
5068 rtx_insn *insn;
5070 insn = emit_call_insn (pat);
5072 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5073 if (TARGET_VXWORKS_RTP
5074 && flag_pic
5075 && GET_CODE (addr) == SYMBOL_REF
5076 && (SYMBOL_REF_DECL (addr)
5077 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5078 : !SYMBOL_REF_LOCAL_P (addr)))
5080 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5081 crtl->uses_pic_offset_table = 1;
5085 /* Return 1 if RTX is a MEM which is known to be aligned to at
5086 least a DESIRED byte boundary. */
5089 mem_min_alignment (rtx mem, int desired)
5091 rtx addr, base, offset;
5093 /* If it's not a MEM we can't accept it. */
5094 if (GET_CODE (mem) != MEM)
5095 return 0;
5097 /* Obviously... */
5098 if (!TARGET_UNALIGNED_DOUBLES
5099 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5100 return 1;
5102 /* ??? The rest of the function predates MEM_ALIGN so
5103 there is probably a bit of redundancy. */
5104 addr = XEXP (mem, 0);
5105 base = offset = NULL_RTX;
5106 if (GET_CODE (addr) == PLUS)
5108 if (GET_CODE (XEXP (addr, 0)) == REG)
5110 base = XEXP (addr, 0);
5112 /* What we are saying here is that if the base
5113 REG is aligned properly, the compiler will make
5114 sure any REG based index upon it will be so
5115 as well. */
5116 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5117 offset = XEXP (addr, 1);
5118 else
5119 offset = const0_rtx;
5122 else if (GET_CODE (addr) == REG)
5124 base = addr;
5125 offset = const0_rtx;
5128 if (base != NULL_RTX)
5130 int regno = REGNO (base);
5132 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5134 /* Check if the compiler has recorded some information
5135 about the alignment of the base REG. If reload has
5136 completed, we already matched with proper alignments.
5137 If not running global_alloc, reload might give us
5138 unaligned pointer to local stack though. */
5139 if (((cfun != 0
5140 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5141 || (optimize && reload_completed))
5142 && (INTVAL (offset) & (desired - 1)) == 0)
5143 return 1;
5145 else
5147 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5148 return 1;
5151 else if (! TARGET_UNALIGNED_DOUBLES
5152 || CONSTANT_P (addr)
5153 || GET_CODE (addr) == LO_SUM)
5155 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5156 is true, in which case we can only assume that an access is aligned if
5157 it is to a constant address, or the address involves a LO_SUM. */
5158 return 1;
5161 /* An obviously unaligned address. */
5162 return 0;
5166 /* Vectors to keep interesting information about registers where it can easily
5167 be got. We used to use the actual mode value as the bit number, but there
5168 are more than 32 modes now. Instead we use two tables: one indexed by
5169 hard register number, and one indexed by mode. */
5171 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5172 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5173 mapped into one sparc_mode_class mode. */
5175 enum sparc_mode_class {
5176 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5177 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5178 CC_MODE, CCFP_MODE
5181 /* Modes for single-word and smaller quantities. */
5182 #define S_MODES \
5183 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5185 /* Modes for double-word and smaller quantities. */
5186 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5188 /* Modes for quad-word and smaller quantities. */
5189 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5191 /* Modes for 8-word and smaller quantities. */
5192 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5194 /* Modes for single-float quantities. */
5195 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5197 /* Modes for double-float and smaller quantities. */
5198 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5200 /* Modes for quad-float and smaller quantities. */
5201 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5203 /* Modes for quad-float pairs and smaller quantities. */
5204 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5206 /* Modes for double-float only quantities. */
5207 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5209 /* Modes for quad-float and double-float only quantities. */
5210 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5212 /* Modes for quad-float pairs and double-float only quantities. */
5213 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5215 /* Modes for condition codes. */
5216 #define CC_MODES (1 << (int) CC_MODE)
5217 #define CCFP_MODES (1 << (int) CCFP_MODE)
5219 /* Value is 1 if register/mode pair is acceptable on sparc.
5221 The funny mixture of D and T modes is because integer operations
5222 do not specially operate on tetra quantities, so non-quad-aligned
5223 registers can hold quadword quantities (except %o4 and %i4 because
5224 they cross fixed registers).
5226 ??? Note that, despite the settings, non-double-aligned parameter
5227 registers can hold double-word quantities in 32-bit mode. */
5229 /* This points to either the 32-bit or the 64-bit version. */
5230 static const int *hard_regno_mode_classes;
5232 static const int hard_32bit_mode_classes[] = {
5233 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5234 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5235 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5236 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5238 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5239 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5240 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5241 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5243 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5244 and none can hold SFmode/SImode values. */
5245 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5246 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5247 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5248 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5250 /* %fcc[0123] */
5251 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5253 /* %icc, %sfp, %gsr */
5254 CC_MODES, 0, D_MODES
5257 static const int hard_64bit_mode_classes[] = {
5258 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5259 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5260 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5261 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5263 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5264 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5265 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5266 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5268 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5269 and none can hold SFmode/SImode values. */
5270 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5271 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5272 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5273 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5275 /* %fcc[0123] */
5276 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5278 /* %icc, %sfp, %gsr */
5279 CC_MODES, 0, D_MODES
5282 static int sparc_mode_class [NUM_MACHINE_MODES];
5284 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5286 static void
5287 sparc_init_modes (void)
5289 int i;
5291 for (i = 0; i < NUM_MACHINE_MODES; i++)
5293 machine_mode m = (machine_mode) i;
5294 unsigned int size = GET_MODE_SIZE (m);
5296 switch (GET_MODE_CLASS (m))
5298 case MODE_INT:
5299 case MODE_PARTIAL_INT:
5300 case MODE_COMPLEX_INT:
5301 if (size < 4)
5302 sparc_mode_class[i] = 1 << (int) H_MODE;
5303 else if (size == 4)
5304 sparc_mode_class[i] = 1 << (int) S_MODE;
5305 else if (size == 8)
5306 sparc_mode_class[i] = 1 << (int) D_MODE;
5307 else if (size == 16)
5308 sparc_mode_class[i] = 1 << (int) T_MODE;
5309 else if (size == 32)
5310 sparc_mode_class[i] = 1 << (int) O_MODE;
5311 else
5312 sparc_mode_class[i] = 0;
5313 break;
5314 case MODE_VECTOR_INT:
5315 if (size == 4)
5316 sparc_mode_class[i] = 1 << (int) SF_MODE;
5317 else if (size == 8)
5318 sparc_mode_class[i] = 1 << (int) DF_MODE;
5319 else
5320 sparc_mode_class[i] = 0;
5321 break;
5322 case MODE_FLOAT:
5323 case MODE_COMPLEX_FLOAT:
5324 if (size == 4)
5325 sparc_mode_class[i] = 1 << (int) SF_MODE;
5326 else if (size == 8)
5327 sparc_mode_class[i] = 1 << (int) DF_MODE;
5328 else if (size == 16)
5329 sparc_mode_class[i] = 1 << (int) TF_MODE;
5330 else if (size == 32)
5331 sparc_mode_class[i] = 1 << (int) OF_MODE;
5332 else
5333 sparc_mode_class[i] = 0;
5334 break;
5335 case MODE_CC:
5336 if (m == CCFPmode || m == CCFPEmode)
5337 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5338 else
5339 sparc_mode_class[i] = 1 << (int) CC_MODE;
5340 break;
5341 default:
5342 sparc_mode_class[i] = 0;
5343 break;
5347 if (TARGET_ARCH64)
5348 hard_regno_mode_classes = hard_64bit_mode_classes;
5349 else
5350 hard_regno_mode_classes = hard_32bit_mode_classes;
5352 /* Initialize the array used by REGNO_REG_CLASS. */
5353 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5355 if (i < 16 && TARGET_V8PLUS)
5356 sparc_regno_reg_class[i] = I64_REGS;
5357 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5358 sparc_regno_reg_class[i] = GENERAL_REGS;
5359 else if (i < 64)
5360 sparc_regno_reg_class[i] = FP_REGS;
5361 else if (i < 96)
5362 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5363 else if (i < 100)
5364 sparc_regno_reg_class[i] = FPCC_REGS;
5365 else
5366 sparc_regno_reg_class[i] = NO_REGS;
5370 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5372 static inline bool
5373 save_global_or_fp_reg_p (unsigned int regno,
5374 int leaf_function ATTRIBUTE_UNUSED)
5376 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5379 /* Return whether the return address register (%i7) is needed. */
5381 static inline bool
5382 return_addr_reg_needed_p (int leaf_function)
5384 /* If it is live, for example because of __builtin_return_address (0). */
5385 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5386 return true;
5388 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5389 if (!leaf_function
5390 /* Loading the GOT register clobbers %o7. */
5391 || crtl->uses_pic_offset_table
5392 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5393 return true;
5395 return false;
5398 /* Return whether REGNO, a local or in register, must be saved/restored. */
5400 static bool
5401 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5403 /* General case: call-saved registers live at some point. */
5404 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5405 return true;
5407 /* Frame pointer register (%fp) if needed. */
5408 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5409 return true;
5411 /* Return address register (%i7) if needed. */
5412 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5413 return true;
5415 /* GOT register (%l7) if needed. */
5416 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5417 return true;
5419 /* If the function accesses prior frames, the frame pointer and the return
5420 address of the previous frame must be saved on the stack. */
5421 if (crtl->accesses_prior_frames
5422 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5423 return true;
5425 return false;
5428 /* Compute the frame size required by the function. This function is called
5429 during the reload pass and also by sparc_expand_prologue. */
5431 HOST_WIDE_INT
5432 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5434 HOST_WIDE_INT frame_size, apparent_frame_size;
5435 int args_size, n_global_fp_regs = 0;
5436 bool save_local_in_regs_p = false;
5437 unsigned int i;
5439 /* If the function allocates dynamic stack space, the dynamic offset is
5440 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5441 if (leaf_function && !cfun->calls_alloca)
5442 args_size = 0;
5443 else
5444 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5446 /* Calculate space needed for global registers. */
5447 if (TARGET_ARCH64)
5449 for (i = 0; i < 8; i++)
5450 if (save_global_or_fp_reg_p (i, 0))
5451 n_global_fp_regs += 2;
5453 else
5455 for (i = 0; i < 8; i += 2)
5456 if (save_global_or_fp_reg_p (i, 0)
5457 || save_global_or_fp_reg_p (i + 1, 0))
5458 n_global_fp_regs += 2;
5461 /* In the flat window model, find out which local and in registers need to
5462 be saved. We don't reserve space in the current frame for them as they
5463 will be spilled into the register window save area of the caller's frame.
5464 However, as soon as we use this register window save area, we must create
5465 that of the current frame to make it the live one. */
5466 if (TARGET_FLAT)
5467 for (i = 16; i < 32; i++)
5468 if (save_local_or_in_reg_p (i, leaf_function))
5470 save_local_in_regs_p = true;
5471 break;
5474 /* Calculate space needed for FP registers. */
5475 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5476 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5477 n_global_fp_regs += 2;
5479 if (size == 0
5480 && n_global_fp_regs == 0
5481 && args_size == 0
5482 && !save_local_in_regs_p)
5483 frame_size = apparent_frame_size = 0;
5484 else
5486 /* Start from the apparent frame size. */
5487 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5489 /* We need to add the size of the outgoing argument area. */
5490 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5492 /* And that of the register window save area. */
5493 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5495 /* Finally, bump to the appropriate alignment. */
5496 frame_size = SPARC_STACK_ALIGN (frame_size);
5499 /* Set up values for use in prologue and epilogue. */
5500 sparc_frame_size = frame_size;
5501 sparc_apparent_frame_size = apparent_frame_size;
5502 sparc_n_global_fp_regs = n_global_fp_regs;
5503 sparc_save_local_in_regs_p = save_local_in_regs_p;
5505 return frame_size;
5508 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5511 sparc_initial_elimination_offset (int to)
5513 int offset;
5515 if (to == STACK_POINTER_REGNUM)
5516 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5517 else
5518 offset = 0;
5520 offset += SPARC_STACK_BIAS;
5521 return offset;
5524 /* Output any necessary .register pseudo-ops. */
5526 void
5527 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5529 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5530 int i;
5532 if (TARGET_ARCH32)
5533 return;
5535 /* Check if %g[2367] were used without
5536 .register being printed for them already. */
5537 for (i = 2; i < 8; i++)
5539 if (df_regs_ever_live_p (i)
5540 && ! sparc_hard_reg_printed [i])
5542 sparc_hard_reg_printed [i] = 1;
5543 /* %g7 is used as TLS base register, use #ignore
5544 for it instead of #scratch. */
5545 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5546 i == 7 ? "ignore" : "scratch");
5548 if (i == 3) i = 5;
5550 #endif
5553 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5555 #if PROBE_INTERVAL > 4096
5556 #error Cannot use indexed addressing mode for stack probing
5557 #endif
5559 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5560 inclusive. These are offsets from the current stack pointer.
5562 Note that we don't use the REG+REG addressing mode for the probes because
5563 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5564 so the advantages of having a single code win here. */
5566 static void
5567 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5569 rtx g1 = gen_rtx_REG (Pmode, 1);
5571 /* See if we have a constant small number of probes to generate. If so,
5572 that's the easy case. */
5573 if (size <= PROBE_INTERVAL)
5575 emit_move_insn (g1, GEN_INT (first));
5576 emit_insn (gen_rtx_SET (g1,
5577 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5578 emit_stack_probe (plus_constant (Pmode, g1, -size));
5581 /* The run-time loop is made up of 9 insns in the generic case while the
5582 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5583 else if (size <= 4 * PROBE_INTERVAL)
5585 HOST_WIDE_INT i;
5587 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5588 emit_insn (gen_rtx_SET (g1,
5589 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5590 emit_stack_probe (g1);
5592 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5593 it exceeds SIZE. If only two probes are needed, this will not
5594 generate any code. Then probe at FIRST + SIZE. */
5595 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5597 emit_insn (gen_rtx_SET (g1,
5598 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5599 emit_stack_probe (g1);
5602 emit_stack_probe (plus_constant (Pmode, g1,
5603 (i - PROBE_INTERVAL) - size));
5606 /* Otherwise, do the same as above, but in a loop. Note that we must be
5607 extra careful with variables wrapping around because we might be at
5608 the very top (or the very bottom) of the address space and we have
5609 to be able to handle this case properly; in particular, we use an
5610 equality test for the loop condition. */
5611 else
5613 HOST_WIDE_INT rounded_size;
5614 rtx g4 = gen_rtx_REG (Pmode, 4);
5616 emit_move_insn (g1, GEN_INT (first));
5619 /* Step 1: round SIZE to the previous multiple of the interval. */
5621 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5622 emit_move_insn (g4, GEN_INT (rounded_size));
5625 /* Step 2: compute initial and final value of the loop counter. */
5627 /* TEST_ADDR = SP + FIRST. */
5628 emit_insn (gen_rtx_SET (g1,
5629 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5631 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5632 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5635 /* Step 3: the loop
5637 while (TEST_ADDR != LAST_ADDR)
5639 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5640 probe at TEST_ADDR
5643 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5644 until it is equal to ROUNDED_SIZE. */
5646 if (TARGET_ARCH64)
5647 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5648 else
5649 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5652 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5653 that SIZE is equal to ROUNDED_SIZE. */
5655 if (size != rounded_size)
5656 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5659 /* Make sure nothing is scheduled before we are done. */
5660 emit_insn (gen_blockage ());
5663 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5664 absolute addresses. */
5666 const char *
5667 output_probe_stack_range (rtx reg1, rtx reg2)
5669 static int labelno = 0;
5670 char loop_lab[32];
5671 rtx xops[2];
5673 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5675 /* Loop. */
5676 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5678 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5679 xops[0] = reg1;
5680 xops[1] = GEN_INT (-PROBE_INTERVAL);
5681 output_asm_insn ("add\t%0, %1, %0", xops);
5683 /* Test if TEST_ADDR == LAST_ADDR. */
5684 xops[1] = reg2;
5685 output_asm_insn ("cmp\t%0, %1", xops);
5687 /* Probe at TEST_ADDR and branch. */
5688 if (TARGET_ARCH64)
5689 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5690 else
5691 fputs ("\tbne\t", asm_out_file);
5692 assemble_name_raw (asm_out_file, loop_lab);
5693 fputc ('\n', asm_out_file);
5694 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5695 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5697 return "";
5700 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5701 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5702 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5703 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5704 the action to be performed if it returns false. Return the new offset. */
5706 typedef bool (*sorr_pred_t) (unsigned int, int);
5707 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5709 static int
5710 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5711 int offset, int leaf_function, sorr_pred_t save_p,
5712 sorr_act_t action_true, sorr_act_t action_false)
5714 unsigned int i;
5715 rtx mem;
5716 rtx_insn *insn;
5718 if (TARGET_ARCH64 && high <= 32)
5720 int fp_offset = -1;
5722 for (i = low; i < high; i++)
5724 if (save_p (i, leaf_function))
5726 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5727 base, offset));
5728 if (action_true == SORR_SAVE)
5730 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5731 RTX_FRAME_RELATED_P (insn) = 1;
5733 else /* action_true == SORR_RESTORE */
5735 /* The frame pointer must be restored last since its old
5736 value may be used as base address for the frame. This
5737 is problematic in 64-bit mode only because of the lack
5738 of double-word load instruction. */
5739 if (i == HARD_FRAME_POINTER_REGNUM)
5740 fp_offset = offset;
5741 else
5742 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5744 offset += 8;
5746 else if (action_false == SORR_ADVANCE)
5747 offset += 8;
5750 if (fp_offset >= 0)
5752 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5753 emit_move_insn (hard_frame_pointer_rtx, mem);
5756 else
5758 for (i = low; i < high; i += 2)
5760 bool reg0 = save_p (i, leaf_function);
5761 bool reg1 = save_p (i + 1, leaf_function);
5762 machine_mode mode;
5763 int regno;
5765 if (reg0 && reg1)
5767 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5768 regno = i;
5770 else if (reg0)
5772 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5773 regno = i;
5775 else if (reg1)
5777 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5778 regno = i + 1;
5779 offset += 4;
5781 else
5783 if (action_false == SORR_ADVANCE)
5784 offset += 8;
5785 continue;
5788 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5789 if (action_true == SORR_SAVE)
5791 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5792 RTX_FRAME_RELATED_P (insn) = 1;
5793 if (mode == DImode)
5795 rtx set1, set2;
5796 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5797 offset));
5798 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5799 RTX_FRAME_RELATED_P (set1) = 1;
5801 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5802 offset + 4));
5803 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5804 RTX_FRAME_RELATED_P (set2) = 1;
5805 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5806 gen_rtx_PARALLEL (VOIDmode,
5807 gen_rtvec (2, set1, set2)));
5810 else /* action_true == SORR_RESTORE */
5811 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5813 /* Bump and round down to double word
5814 in case we already bumped by 4. */
5815 offset = ROUND_DOWN (offset + 8, 8);
5819 return offset;
5822 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5824 static rtx
5825 emit_adjust_base_to_offset (rtx base, int offset)
5827 /* ??? This might be optimized a little as %g1 might already have a
5828 value close enough that a single add insn will do. */
5829 /* ??? Although, all of this is probably only a temporary fix because
5830 if %g1 can hold a function result, then sparc_expand_epilogue will
5831 lose (the result will be clobbered). */
5832 rtx new_base = gen_rtx_REG (Pmode, 1);
5833 emit_move_insn (new_base, GEN_INT (offset));
5834 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5835 return new_base;
5838 /* Emit code to save/restore call-saved global and FP registers. */
5840 static void
5841 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5843 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5845 base = emit_adjust_base_to_offset (base, offset);
5846 offset = 0;
5849 offset
5850 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5851 save_global_or_fp_reg_p, action, SORR_NONE);
5852 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5853 save_global_or_fp_reg_p, action, SORR_NONE);
5856 /* Emit code to save/restore call-saved local and in registers. */
5858 static void
5859 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5861 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5863 base = emit_adjust_base_to_offset (base, offset);
5864 offset = 0;
5867 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5868 save_local_or_in_reg_p, action, SORR_ADVANCE);
5871 /* Emit a window_save insn. */
5873 static rtx_insn *
5874 emit_window_save (rtx increment)
5876 rtx_insn *insn = emit_insn (gen_window_save (increment));
5877 RTX_FRAME_RELATED_P (insn) = 1;
5879 /* The incoming return address (%o7) is saved in %i7. */
5880 add_reg_note (insn, REG_CFA_REGISTER,
5881 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5882 gen_rtx_REG (Pmode,
5883 INCOMING_RETURN_ADDR_REGNUM)));
5885 /* The window save event. */
5886 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5888 /* The CFA is %fp, the hard frame pointer. */
5889 add_reg_note (insn, REG_CFA_DEF_CFA,
5890 plus_constant (Pmode, hard_frame_pointer_rtx,
5891 INCOMING_FRAME_SP_OFFSET));
5893 return insn;
5896 /* Generate an increment for the stack pointer. */
5898 static rtx
5899 gen_stack_pointer_inc (rtx increment)
5901 return gen_rtx_SET (stack_pointer_rtx,
5902 gen_rtx_PLUS (Pmode,
5903 stack_pointer_rtx,
5904 increment));
5907 /* Expand the function prologue. The prologue is responsible for reserving
5908 storage for the frame, saving the call-saved registers and loading the
5909 GOT register if needed. */
5911 void
5912 sparc_expand_prologue (void)
5914 HOST_WIDE_INT size;
5915 rtx_insn *insn;
5917 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5918 on the final value of the flag means deferring the prologue/epilogue
5919 expansion until just before the second scheduling pass, which is too
5920 late to emit multiple epilogues or return insns.
5922 Of course we are making the assumption that the value of the flag
5923 will not change between now and its final value. Of the three parts
5924 of the formula, only the last one can reasonably vary. Let's take a
5925 closer look, after assuming that the first two ones are set to true
5926 (otherwise the last value is effectively silenced).
5928 If only_leaf_regs_used returns false, the global predicate will also
5929 be false so the actual frame size calculated below will be positive.
5930 As a consequence, the save_register_window insn will be emitted in
5931 the instruction stream; now this insn explicitly references %fp
5932 which is not a leaf register so only_leaf_regs_used will always
5933 return false subsequently.
5935 If only_leaf_regs_used returns true, we hope that the subsequent
5936 optimization passes won't cause non-leaf registers to pop up. For
5937 example, the regrename pass has special provisions to not rename to
5938 non-leaf registers in a leaf function. */
5939 sparc_leaf_function_p
5940 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5942 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5944 if (flag_stack_usage_info)
5945 current_function_static_stack_size = size;
5947 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
5948 || flag_stack_clash_protection)
5950 if (crtl->is_leaf && !cfun->calls_alloca)
5952 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
5953 sparc_emit_probe_stack_range (get_stack_check_protect (),
5954 size - get_stack_check_protect ());
5956 else if (size > 0)
5957 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
5960 if (size == 0)
5961 ; /* do nothing. */
5962 else if (sparc_leaf_function_p)
5964 rtx size_int_rtx = GEN_INT (-size);
5966 if (size <= 4096)
5967 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5968 else if (size <= 8192)
5970 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5971 RTX_FRAME_RELATED_P (insn) = 1;
5973 /* %sp is still the CFA register. */
5974 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5976 else
5978 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5979 emit_move_insn (size_rtx, size_int_rtx);
5980 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5981 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5982 gen_stack_pointer_inc (size_int_rtx));
5985 RTX_FRAME_RELATED_P (insn) = 1;
5987 else
5989 rtx size_int_rtx = GEN_INT (-size);
5991 if (size <= 4096)
5992 emit_window_save (size_int_rtx);
5993 else if (size <= 8192)
5995 emit_window_save (GEN_INT (-4096));
5997 /* %sp is not the CFA register anymore. */
5998 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6000 /* Make sure no %fp-based store is issued until after the frame is
6001 established. The offset between the frame pointer and the stack
6002 pointer is calculated relative to the value of the stack pointer
6003 at the end of the function prologue, and moving instructions that
6004 access the stack via the frame pointer between the instructions
6005 that decrement the stack pointer could result in accessing the
6006 register window save area, which is volatile. */
6007 emit_insn (gen_frame_blockage ());
6009 else
6011 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6012 emit_move_insn (size_rtx, size_int_rtx);
6013 emit_window_save (size_rtx);
6017 if (sparc_leaf_function_p)
6019 sparc_frame_base_reg = stack_pointer_rtx;
6020 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6022 else
6024 sparc_frame_base_reg = hard_frame_pointer_rtx;
6025 sparc_frame_base_offset = SPARC_STACK_BIAS;
6028 if (sparc_n_global_fp_regs > 0)
6029 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6030 sparc_frame_base_offset
6031 - sparc_apparent_frame_size,
6032 SORR_SAVE);
6034 /* Load the GOT register if needed. */
6035 if (crtl->uses_pic_offset_table)
6036 load_got_register ();
6038 /* Advertise that the data calculated just above are now valid. */
6039 sparc_prologue_data_valid_p = true;
6042 /* Expand the function prologue. The prologue is responsible for reserving
6043 storage for the frame, saving the call-saved registers and loading the
6044 GOT register if needed. */
6046 void
6047 sparc_flat_expand_prologue (void)
6049 HOST_WIDE_INT size;
6050 rtx_insn *insn;
6052 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6054 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6056 if (flag_stack_usage_info)
6057 current_function_static_stack_size = size;
6059 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6060 || flag_stack_clash_protection)
6062 if (crtl->is_leaf && !cfun->calls_alloca)
6064 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6065 sparc_emit_probe_stack_range (get_stack_check_protect (),
6066 size - get_stack_check_protect ());
6068 else if (size > 0)
6069 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6072 if (sparc_save_local_in_regs_p)
6073 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6074 SORR_SAVE);
6076 if (size == 0)
6077 ; /* do nothing. */
6078 else
6080 rtx size_int_rtx, size_rtx;
6082 size_rtx = size_int_rtx = GEN_INT (-size);
6084 /* We establish the frame (i.e. decrement the stack pointer) first, even
6085 if we use a frame pointer, because we cannot clobber any call-saved
6086 registers, including the frame pointer, if we haven't created a new
6087 register save area, for the sake of compatibility with the ABI. */
6088 if (size <= 4096)
6089 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6090 else if (size <= 8192 && !frame_pointer_needed)
6092 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6093 RTX_FRAME_RELATED_P (insn) = 1;
6094 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6096 else
6098 size_rtx = gen_rtx_REG (Pmode, 1);
6099 emit_move_insn (size_rtx, size_int_rtx);
6100 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6101 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6102 gen_stack_pointer_inc (size_int_rtx));
6104 RTX_FRAME_RELATED_P (insn) = 1;
6106 /* Ensure nothing is scheduled until after the frame is established. */
6107 emit_insn (gen_blockage ());
6109 if (frame_pointer_needed)
6111 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6112 gen_rtx_MINUS (Pmode,
6113 stack_pointer_rtx,
6114 size_rtx)));
6115 RTX_FRAME_RELATED_P (insn) = 1;
6117 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6118 gen_rtx_SET (hard_frame_pointer_rtx,
6119 plus_constant (Pmode, stack_pointer_rtx,
6120 size)));
6123 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6125 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6126 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6128 insn = emit_move_insn (i7, o7);
6129 RTX_FRAME_RELATED_P (insn) = 1;
6131 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6133 /* Prevent this instruction from ever being considered dead,
6134 even if this function has no epilogue. */
6135 emit_use (i7);
6139 if (frame_pointer_needed)
6141 sparc_frame_base_reg = hard_frame_pointer_rtx;
6142 sparc_frame_base_offset = SPARC_STACK_BIAS;
6144 else
6146 sparc_frame_base_reg = stack_pointer_rtx;
6147 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6150 if (sparc_n_global_fp_regs > 0)
6151 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6152 sparc_frame_base_offset
6153 - sparc_apparent_frame_size,
6154 SORR_SAVE);
6156 /* Load the GOT register if needed. */
6157 if (crtl->uses_pic_offset_table)
6158 load_got_register ();
6160 /* Advertise that the data calculated just above are now valid. */
6161 sparc_prologue_data_valid_p = true;
6164 /* This function generates the assembly code for function entry, which boils
6165 down to emitting the necessary .register directives. */
6167 static void
6168 sparc_asm_function_prologue (FILE *file)
6170 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6171 if (!TARGET_FLAT)
6172 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6174 sparc_output_scratch_registers (file);
6177 /* Expand the function epilogue, either normal or part of a sibcall.
6178 We emit all the instructions except the return or the call. */
6180 void
6181 sparc_expand_epilogue (bool for_eh)
6183 HOST_WIDE_INT size = sparc_frame_size;
6185 if (cfun->calls_alloca)
6186 emit_insn (gen_frame_blockage ());
6188 if (sparc_n_global_fp_regs > 0)
6189 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6190 sparc_frame_base_offset
6191 - sparc_apparent_frame_size,
6192 SORR_RESTORE);
6194 if (size == 0 || for_eh)
6195 ; /* do nothing. */
6196 else if (sparc_leaf_function_p)
6198 if (size <= 4096)
6199 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6200 else if (size <= 8192)
6202 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6203 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6205 else
6207 rtx reg = gen_rtx_REG (Pmode, 1);
6208 emit_move_insn (reg, GEN_INT (size));
6209 emit_insn (gen_stack_pointer_inc (reg));
6214 /* Expand the function epilogue, either normal or part of a sibcall.
6215 We emit all the instructions except the return or the call. */
6217 void
6218 sparc_flat_expand_epilogue (bool for_eh)
6220 HOST_WIDE_INT size = sparc_frame_size;
6222 if (sparc_n_global_fp_regs > 0)
6223 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6224 sparc_frame_base_offset
6225 - sparc_apparent_frame_size,
6226 SORR_RESTORE);
6228 /* If we have a frame pointer, we'll need both to restore it before the
6229 frame is destroyed and use its current value in destroying the frame.
6230 Since we don't have an atomic way to do that in the flat window model,
6231 we save the current value into a temporary register (%g1). */
6232 if (frame_pointer_needed && !for_eh)
6233 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6235 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6236 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6237 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6239 if (sparc_save_local_in_regs_p)
6240 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6241 sparc_frame_base_offset,
6242 SORR_RESTORE);
6244 if (size == 0 || for_eh)
6245 ; /* do nothing. */
6246 else if (frame_pointer_needed)
6248 /* Make sure the frame is destroyed after everything else is done. */
6249 emit_insn (gen_blockage ());
6251 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6253 else
6255 /* Likewise. */
6256 emit_insn (gen_blockage ());
6258 if (size <= 4096)
6259 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6260 else if (size <= 8192)
6262 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6263 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6265 else
6267 rtx reg = gen_rtx_REG (Pmode, 1);
6268 emit_move_insn (reg, GEN_INT (size));
6269 emit_insn (gen_stack_pointer_inc (reg));
6274 /* Return true if it is appropriate to emit `return' instructions in the
6275 body of a function. */
6277 bool
6278 sparc_can_use_return_insn_p (void)
6280 return sparc_prologue_data_valid_p
6281 && sparc_n_global_fp_regs == 0
6282 && TARGET_FLAT
6283 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6284 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6287 /* This function generates the assembly code for function exit. */
6289 static void
6290 sparc_asm_function_epilogue (FILE *file)
6292 /* If the last two instructions of a function are "call foo; dslot;"
6293 the return address might point to the first instruction in the next
6294 function and we have to output a dummy nop for the sake of sane
6295 backtraces in such cases. This is pointless for sibling calls since
6296 the return address is explicitly adjusted. */
6298 rtx_insn *insn = get_last_insn ();
6300 rtx last_real_insn = prev_real_insn (insn);
6301 if (last_real_insn
6302 && NONJUMP_INSN_P (last_real_insn)
6303 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6304 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6306 if (last_real_insn
6307 && CALL_P (last_real_insn)
6308 && !SIBLING_CALL_P (last_real_insn))
6309 fputs("\tnop\n", file);
6311 sparc_output_deferred_case_vectors ();
6314 /* Output a 'restore' instruction. */
6316 static void
6317 output_restore (rtx pat)
6319 rtx operands[3];
6321 if (! pat)
6323 fputs ("\t restore\n", asm_out_file);
6324 return;
6327 gcc_assert (GET_CODE (pat) == SET);
6329 operands[0] = SET_DEST (pat);
6330 pat = SET_SRC (pat);
6332 switch (GET_CODE (pat))
6334 case PLUS:
6335 operands[1] = XEXP (pat, 0);
6336 operands[2] = XEXP (pat, 1);
6337 output_asm_insn (" restore %r1, %2, %Y0", operands);
6338 break;
6339 case LO_SUM:
6340 operands[1] = XEXP (pat, 0);
6341 operands[2] = XEXP (pat, 1);
6342 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6343 break;
6344 case ASHIFT:
6345 operands[1] = XEXP (pat, 0);
6346 gcc_assert (XEXP (pat, 1) == const1_rtx);
6347 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6348 break;
6349 default:
6350 operands[1] = pat;
6351 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6352 break;
6356 /* Output a return. */
6358 const char *
6359 output_return (rtx_insn *insn)
6361 if (crtl->calls_eh_return)
6363 /* If the function uses __builtin_eh_return, the eh_return
6364 machinery occupies the delay slot. */
6365 gcc_assert (!final_sequence);
6367 if (flag_delayed_branch)
6369 if (!TARGET_FLAT && TARGET_V9)
6370 fputs ("\treturn\t%i7+8\n", asm_out_file);
6371 else
6373 if (!TARGET_FLAT)
6374 fputs ("\trestore\n", asm_out_file);
6376 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6379 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6381 else
6383 if (!TARGET_FLAT)
6384 fputs ("\trestore\n", asm_out_file);
6386 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6387 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6390 else if (sparc_leaf_function_p || TARGET_FLAT)
6392 /* This is a leaf or flat function so we don't have to bother restoring
6393 the register window, which frees us from dealing with the convoluted
6394 semantics of restore/return. We simply output the jump to the
6395 return address and the insn in the delay slot (if any). */
6397 return "jmp\t%%o7+%)%#";
6399 else
6401 /* This is a regular function so we have to restore the register window.
6402 We may have a pending insn for the delay slot, which will be either
6403 combined with the 'restore' instruction or put in the delay slot of
6404 the 'return' instruction. */
6406 if (final_sequence)
6408 rtx_insn *delay;
6409 rtx pat;
6410 int seen;
6412 delay = NEXT_INSN (insn);
6413 gcc_assert (delay);
6415 pat = PATTERN (delay);
6417 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6419 epilogue_renumber (&pat, 0);
6420 return "return\t%%i7+%)%#";
6422 else
6424 output_asm_insn ("jmp\t%%i7+%)", NULL);
6426 /* We're going to output the insn in the delay slot manually.
6427 Make sure to output its source location first. */
6428 PATTERN (delay) = gen_blockage ();
6429 INSN_CODE (delay) = -1;
6430 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6431 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6433 output_restore (pat);
6436 else
6438 /* The delay slot is empty. */
6439 if (TARGET_V9)
6440 return "return\t%%i7+%)\n\t nop";
6441 else if (flag_delayed_branch)
6442 return "jmp\t%%i7+%)\n\t restore";
6443 else
6444 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6448 return "";
6451 /* Output a sibling call. */
6453 const char *
6454 output_sibcall (rtx_insn *insn, rtx call_operand)
6456 rtx operands[1];
6458 gcc_assert (flag_delayed_branch);
6460 operands[0] = call_operand;
6462 if (sparc_leaf_function_p || TARGET_FLAT)
6464 /* This is a leaf or flat function so we don't have to bother restoring
6465 the register window. We simply output the jump to the function and
6466 the insn in the delay slot (if any). */
6468 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6470 if (final_sequence)
6471 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6472 operands);
6473 else
6474 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6475 it into branch if possible. */
6476 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6477 operands);
6479 else
6481 /* This is a regular function so we have to restore the register window.
6482 We may have a pending insn for the delay slot, which will be combined
6483 with the 'restore' instruction. */
6485 output_asm_insn ("call\t%a0, 0", operands);
6487 if (final_sequence)
6489 rtx_insn *delay;
6490 rtx pat;
6491 int seen;
6493 delay = NEXT_INSN (insn);
6494 gcc_assert (delay);
6496 pat = PATTERN (delay);
6498 /* We're going to output the insn in the delay slot manually.
6499 Make sure to output its source location first. */
6500 PATTERN (delay) = gen_blockage ();
6501 INSN_CODE (delay) = -1;
6502 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6503 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6505 output_restore (pat);
6507 else
6508 output_restore (NULL_RTX);
6511 return "";
6514 /* Functions for handling argument passing.
6516 For 32-bit, the first 6 args are normally in registers and the rest are
6517 pushed. Any arg that starts within the first 6 words is at least
6518 partially passed in a register unless its data type forbids.
6520 For 64-bit, the argument registers are laid out as an array of 16 elements
6521 and arguments are added sequentially. The first 6 int args and up to the
6522 first 16 fp args (depending on size) are passed in regs.
6524 Slot Stack Integral Float Float in structure Double Long Double
6525 ---- ----- -------- ----- ------------------ ------ -----------
6526 15 [SP+248] %f31 %f30,%f31 %d30
6527 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6528 13 [SP+232] %f27 %f26,%f27 %d26
6529 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6530 11 [SP+216] %f23 %f22,%f23 %d22
6531 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6532 9 [SP+200] %f19 %f18,%f19 %d18
6533 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6534 7 [SP+184] %f15 %f14,%f15 %d14
6535 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6536 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6537 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6538 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6539 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6540 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6541 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6543 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6545 Integral arguments are always passed as 64-bit quantities appropriately
6546 extended.
6548 Passing of floating point values is handled as follows.
6549 If a prototype is in scope:
6550 If the value is in a named argument (i.e. not a stdarg function or a
6551 value not part of the `...') then the value is passed in the appropriate
6552 fp reg.
6553 If the value is part of the `...' and is passed in one of the first 6
6554 slots then the value is passed in the appropriate int reg.
6555 If the value is part of the `...' and is not passed in one of the first 6
6556 slots then the value is passed in memory.
6557 If a prototype is not in scope:
6558 If the value is one of the first 6 arguments the value is passed in the
6559 appropriate integer reg and the appropriate fp reg.
6560 If the value is not one of the first 6 arguments the value is passed in
6561 the appropriate fp reg and in memory.
6564 Summary of the calling conventions implemented by GCC on the SPARC:
6566 32-bit ABI:
6567 size argument return value
6569 small integer <4 int. reg. int. reg.
6570 word 4 int. reg. int. reg.
6571 double word 8 int. reg. int. reg.
6573 _Complex small integer <8 int. reg. int. reg.
6574 _Complex word 8 int. reg. int. reg.
6575 _Complex double word 16 memory int. reg.
6577 vector integer <=8 int. reg. FP reg.
6578 vector integer >8 memory memory
6580 float 4 int. reg. FP reg.
6581 double 8 int. reg. FP reg.
6582 long double 16 memory memory
6584 _Complex float 8 memory FP reg.
6585 _Complex double 16 memory FP reg.
6586 _Complex long double 32 memory FP reg.
6588 vector float any memory memory
6590 aggregate any memory memory
6594 64-bit ABI:
6595 size argument return value
6597 small integer <8 int. reg. int. reg.
6598 word 8 int. reg. int. reg.
6599 double word 16 int. reg. int. reg.
6601 _Complex small integer <16 int. reg. int. reg.
6602 _Complex word 16 int. reg. int. reg.
6603 _Complex double word 32 memory int. reg.
6605 vector integer <=16 FP reg. FP reg.
6606 vector integer 16<s<=32 memory FP reg.
6607 vector integer >32 memory memory
6609 float 4 FP reg. FP reg.
6610 double 8 FP reg. FP reg.
6611 long double 16 FP reg. FP reg.
6613 _Complex float 8 FP reg. FP reg.
6614 _Complex double 16 FP reg. FP reg.
6615 _Complex long double 32 memory FP reg.
6617 vector float <=16 FP reg. FP reg.
6618 vector float 16<s<=32 memory FP reg.
6619 vector float >32 memory memory
6621 aggregate <=16 reg. reg.
6622 aggregate 16<s<=32 memory reg.
6623 aggregate >32 memory memory
6627 Note #1: complex floating-point types follow the extended SPARC ABIs as
6628 implemented by the Sun compiler.
6630 Note #2: integral vector types follow the scalar floating-point types
6631 conventions to match what is implemented by the Sun VIS SDK.
6633 Note #3: floating-point vector types follow the aggregate types
6634 conventions. */
6637 /* Maximum number of int regs for args. */
6638 #define SPARC_INT_ARG_MAX 6
6639 /* Maximum number of fp regs for args. */
6640 #define SPARC_FP_ARG_MAX 16
6641 /* Number of words (partially) occupied for a given size in units. */
6642 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6644 /* Handle the INIT_CUMULATIVE_ARGS macro.
6645 Initialize a variable CUM of type CUMULATIVE_ARGS
6646 for a call to a function whose data type is FNTYPE.
6647 For a library call, FNTYPE is 0. */
6649 void
6650 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6652 cum->words = 0;
6653 cum->prototype_p = fntype && prototype_p (fntype);
6654 cum->libcall_p = !fntype;
6657 /* Handle promotion of pointer and integer arguments. */
6659 static machine_mode
6660 sparc_promote_function_mode (const_tree type, machine_mode mode,
6661 int *punsignedp, const_tree, int)
6663 if (type && POINTER_TYPE_P (type))
6665 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6666 return Pmode;
6669 /* Integral arguments are passed as full words, as per the ABI. */
6670 if (GET_MODE_CLASS (mode) == MODE_INT
6671 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6672 return word_mode;
6674 return mode;
6677 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6679 static bool
6680 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6682 return TARGET_ARCH64 ? true : false;
6685 /* Traverse the record TYPE recursively and call FUNC on its fields.
6686 NAMED is true if this is for a named parameter. DATA is passed
6687 to FUNC for each field. OFFSET is the starting position and
6688 PACKED is true if we are inside a packed record. */
6690 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6691 static void
6692 traverse_record_type (const_tree type, bool named, T *data,
6693 HOST_WIDE_INT offset = 0, bool packed = false)
6695 /* The ABI obviously doesn't specify how packed structures are passed.
6696 These are passed in integer regs if possible, otherwise memory. */
6697 if (!packed)
6698 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6699 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6701 packed = true;
6702 break;
6705 /* Walk the real fields, but skip those with no size or a zero size.
6706 ??? Fields with variable offset are handled as having zero offset. */
6707 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6708 if (TREE_CODE (field) == FIELD_DECL)
6710 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6711 continue;
6713 HOST_WIDE_INT bitpos = offset;
6714 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6715 bitpos += int_bit_position (field);
6717 tree field_type = TREE_TYPE (field);
6718 if (TREE_CODE (field_type) == RECORD_TYPE)
6719 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6720 packed);
6721 else
6723 const bool fp_type
6724 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6725 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6726 data);
6731 /* Handle recursive register classifying for structure layout. */
6733 typedef struct
6735 bool fp_regs; /* true if field eligible to FP registers. */
6736 bool fp_regs_in_first_word; /* true if such field in first word. */
6737 } classify_data_t;
6739 /* A subroutine of function_arg_slotno. Classify the field. */
6741 inline void
6742 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6743 classify_data_t *data)
6745 if (fp)
6747 data->fp_regs = true;
6748 if (bitpos < BITS_PER_WORD)
6749 data->fp_regs_in_first_word = true;
6753 /* Compute the slot number to pass an argument in.
6754 Return the slot number or -1 if passing on the stack.
6756 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6757 the preceding args and about the function being called.
6758 MODE is the argument's machine mode.
6759 TYPE is the data type of the argument (as a tree).
6760 This is null for libcalls where that information may
6761 not be available.
6762 NAMED is nonzero if this argument is a named parameter
6763 (otherwise it is an extra parameter matching an ellipsis).
6764 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6765 *PREGNO records the register number to use if scalar type.
6766 *PPADDING records the amount of padding needed in words. */
6768 static int
6769 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6770 const_tree type, bool named, bool incoming,
6771 int *pregno, int *ppadding)
6773 int regbase = (incoming
6774 ? SPARC_INCOMING_INT_ARG_FIRST
6775 : SPARC_OUTGOING_INT_ARG_FIRST);
6776 int slotno = cum->words;
6777 enum mode_class mclass;
6778 int regno;
6780 *ppadding = 0;
6782 if (type && TREE_ADDRESSABLE (type))
6783 return -1;
6785 if (TARGET_ARCH32
6786 && mode == BLKmode
6787 && type
6788 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6789 return -1;
6791 /* For SPARC64, objects requiring 16-byte alignment get it. */
6792 if (TARGET_ARCH64
6793 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6794 && (slotno & 1) != 0)
6795 slotno++, *ppadding = 1;
6797 mclass = GET_MODE_CLASS (mode);
6798 if (type && TREE_CODE (type) == VECTOR_TYPE)
6800 /* Vector types deserve special treatment because they are
6801 polymorphic wrt their mode, depending upon whether VIS
6802 instructions are enabled. */
6803 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6805 /* The SPARC port defines no floating-point vector modes. */
6806 gcc_assert (mode == BLKmode);
6808 else
6810 /* Integral vector types should either have a vector
6811 mode or an integral mode, because we are guaranteed
6812 by pass_by_reference that their size is not greater
6813 than 16 bytes and TImode is 16-byte wide. */
6814 gcc_assert (mode != BLKmode);
6816 /* Vector integers are handled like floats according to
6817 the Sun VIS SDK. */
6818 mclass = MODE_FLOAT;
6822 switch (mclass)
6824 case MODE_FLOAT:
6825 case MODE_COMPLEX_FLOAT:
6826 case MODE_VECTOR_INT:
6827 if (TARGET_ARCH64 && TARGET_FPU && named)
6829 /* If all arg slots are filled, then must pass on stack. */
6830 if (slotno >= SPARC_FP_ARG_MAX)
6831 return -1;
6833 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6834 /* Arguments filling only one single FP register are
6835 right-justified in the outer double FP register. */
6836 if (GET_MODE_SIZE (mode) <= 4)
6837 regno++;
6838 break;
6840 /* fallthrough */
6842 case MODE_INT:
6843 case MODE_COMPLEX_INT:
6844 /* If all arg slots are filled, then must pass on stack. */
6845 if (slotno >= SPARC_INT_ARG_MAX)
6846 return -1;
6848 regno = regbase + slotno;
6849 break;
6851 case MODE_RANDOM:
6852 if (mode == VOIDmode)
6853 /* MODE is VOIDmode when generating the actual call. */
6854 return -1;
6856 gcc_assert (mode == BLKmode);
6858 if (TARGET_ARCH32
6859 || !type
6860 || (TREE_CODE (type) != RECORD_TYPE
6861 && TREE_CODE (type) != VECTOR_TYPE))
6863 /* If all arg slots are filled, then must pass on stack. */
6864 if (slotno >= SPARC_INT_ARG_MAX)
6865 return -1;
6867 regno = regbase + slotno;
6869 else /* TARGET_ARCH64 && type */
6871 /* If all arg slots are filled, then must pass on stack. */
6872 if (slotno >= SPARC_FP_ARG_MAX)
6873 return -1;
6875 if (TREE_CODE (type) == RECORD_TYPE)
6877 classify_data_t data = { false, false };
6878 traverse_record_type<classify_data_t, classify_registers>
6879 (type, named, &data);
6881 if (data.fp_regs)
6883 /* If all FP slots are filled except for the last one and
6884 there is no FP field in the first word, then must pass
6885 on stack. */
6886 if (slotno >= SPARC_FP_ARG_MAX - 1
6887 && !data.fp_regs_in_first_word)
6888 return -1;
6890 else
6892 /* If all int slots are filled, then must pass on stack. */
6893 if (slotno >= SPARC_INT_ARG_MAX)
6894 return -1;
6898 /* PREGNO isn't set since both int and FP regs can be used. */
6899 return slotno;
6901 break;
6903 default :
6904 gcc_unreachable ();
6907 *pregno = regno;
6908 return slotno;
6911 /* Handle recursive register counting/assigning for structure layout. */
6913 typedef struct
6915 int slotno; /* slot number of the argument. */
6916 int regbase; /* regno of the base register. */
6917 int intoffset; /* offset of the first pending integer field. */
6918 int nregs; /* number of words passed in registers. */
6919 bool stack; /* true if part of the argument is on the stack. */
6920 rtx ret; /* return expression being built. */
6921 } assign_data_t;
6923 /* A subroutine of function_arg_record_value. Compute the number of integer
6924 registers to be assigned between PARMS->intoffset and BITPOS. Return
6925 true if at least one integer register is assigned or false otherwise. */
6927 static bool
6928 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6930 if (data->intoffset < 0)
6931 return false;
6933 const int intoffset = data->intoffset;
6934 data->intoffset = -1;
6936 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6937 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6938 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6939 int nregs = (endbit - startbit) / BITS_PER_WORD;
6941 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6943 nregs = SPARC_INT_ARG_MAX - this_slotno;
6945 /* We need to pass this field (partly) on the stack. */
6946 data->stack = 1;
6949 if (nregs <= 0)
6950 return false;
6952 *pnregs = nregs;
6953 return true;
6956 /* A subroutine of function_arg_record_value. Compute the number and the mode
6957 of the FP registers to be assigned for FIELD. Return true if at least one
6958 FP register is assigned or false otherwise. */
6960 static bool
6961 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6962 assign_data_t *data,
6963 int *pnregs, machine_mode *pmode)
6965 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6966 machine_mode mode = DECL_MODE (field);
6967 int nregs, nslots;
6969 /* Slots are counted as words while regs are counted as having the size of
6970 the (inner) mode. */
6971 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6973 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6974 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6976 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6978 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6979 nregs = 2;
6981 else
6982 nregs = 1;
6984 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6986 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6988 nslots = SPARC_FP_ARG_MAX - this_slotno;
6989 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6991 /* We need to pass this field (partly) on the stack. */
6992 data->stack = 1;
6994 if (nregs <= 0)
6995 return false;
6998 *pnregs = nregs;
6999 *pmode = mode;
7000 return true;
7003 /* A subroutine of function_arg_record_value. Count the number of registers
7004 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7006 inline void
7007 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7008 assign_data_t *data)
7010 if (fp)
7012 int nregs;
7013 machine_mode mode;
7015 if (compute_int_layout (bitpos, data, &nregs))
7016 data->nregs += nregs;
7018 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7019 data->nregs += nregs;
7021 else
7023 if (data->intoffset < 0)
7024 data->intoffset = bitpos;
7028 /* A subroutine of function_arg_record_value. Assign the bits of the
7029 structure between PARMS->intoffset and BITPOS to integer registers. */
7031 static void
7032 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
7034 int intoffset = data->intoffset;
7035 machine_mode mode;
7036 int nregs;
7038 if (!compute_int_layout (bitpos, data, &nregs))
7039 return;
7041 /* If this is the trailing part of a word, only load that much into
7042 the register. Otherwise load the whole register. Note that in
7043 the latter case we may pick up unwanted bits. It's not a problem
7044 at the moment but may wish to revisit. */
7045 if (intoffset % BITS_PER_WORD != 0)
7046 mode = smallest_int_mode_for_size (BITS_PER_WORD
7047 - intoffset % BITS_PER_WORD);
7048 else
7049 mode = word_mode;
7051 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7052 unsigned int regno = data->regbase + this_slotno;
7053 intoffset /= BITS_PER_UNIT;
7057 rtx reg = gen_rtx_REG (mode, regno);
7058 XVECEXP (data->ret, 0, data->stack + data->nregs)
7059 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7060 data->nregs += 1;
7061 mode = word_mode;
7062 regno += 1;
7063 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7065 while (--nregs > 0);
7068 /* A subroutine of function_arg_record_value. Assign FIELD at position
7069 BITPOS to FP registers. */
7071 static void
7072 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
7073 assign_data_t *data)
7075 int nregs;
7076 machine_mode mode;
7078 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7079 return;
7081 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7082 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7083 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7084 regno++;
7085 int pos = bitpos / BITS_PER_UNIT;
7089 rtx reg = gen_rtx_REG (mode, regno);
7090 XVECEXP (data->ret, 0, data->stack + data->nregs)
7091 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7092 data->nregs += 1;
7093 regno += GET_MODE_SIZE (mode) / 4;
7094 pos += GET_MODE_SIZE (mode);
7096 while (--nregs > 0);
7099 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7100 the structure between PARMS->intoffset and BITPOS to registers. */
7102 inline void
7103 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
7104 assign_data_t *data)
7106 if (fp)
7108 assign_int_registers (bitpos, data);
7110 assign_fp_registers (field, bitpos, data);
7112 else
7114 if (data->intoffset < 0)
7115 data->intoffset = bitpos;
7119 /* Used by function_arg and sparc_function_value_1 to implement the complex
7120 conventions of the 64-bit ABI for passing and returning structures.
7121 Return an expression valid as a return value for the FUNCTION_ARG
7122 and TARGET_FUNCTION_VALUE.
7124 TYPE is the data type of the argument (as a tree).
7125 This is null for libcalls where that information may
7126 not be available.
7127 MODE is the argument's machine mode.
7128 SLOTNO is the index number of the argument's slot in the parameter array.
7129 NAMED is true if this argument is a named parameter
7130 (otherwise it is an extra parameter matching an ellipsis).
7131 REGBASE is the regno of the base register for the parameter array. */
7133 static rtx
7134 function_arg_record_value (const_tree type, machine_mode mode,
7135 int slotno, bool named, int regbase)
7137 HOST_WIDE_INT typesize = int_size_in_bytes (type);
7138 assign_data_t data;
7139 int nregs;
7141 data.slotno = slotno;
7142 data.regbase = regbase;
7144 /* Count how many registers we need. */
7145 data.nregs = 0;
7146 data.intoffset = 0;
7147 data.stack = false;
7148 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7150 /* Take into account pending integer fields. */
7151 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
7152 data.nregs += nregs;
7154 /* Allocate the vector and handle some annoying special cases. */
7155 nregs = data.nregs;
7157 if (nregs == 0)
7159 /* ??? Empty structure has no value? Duh? */
7160 if (typesize <= 0)
7162 /* Though there's nothing really to store, return a word register
7163 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7164 leads to breakage due to the fact that there are zero bytes to
7165 load. */
7166 return gen_rtx_REG (mode, regbase);
7169 /* ??? C++ has structures with no fields, and yet a size. Give up
7170 for now and pass everything back in integer registers. */
7171 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
7172 if (nregs + slotno > SPARC_INT_ARG_MAX)
7173 nregs = SPARC_INT_ARG_MAX - slotno;
7176 gcc_assert (nregs > 0);
7178 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7180 /* If at least one field must be passed on the stack, generate
7181 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7182 also be passed on the stack. We can't do much better because the
7183 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7184 of structures for which the fields passed exclusively in registers
7185 are not at the beginning of the structure. */
7186 if (data.stack)
7187 XVECEXP (data.ret, 0, 0)
7188 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7190 /* Assign the registers. */
7191 data.nregs = 0;
7192 data.intoffset = 0;
7193 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7195 /* Assign pending integer fields. */
7196 assign_int_registers (typesize * BITS_PER_UNIT, &data);
7198 gcc_assert (data.nregs == nregs);
7200 return data.ret;
7203 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7204 of the 64-bit ABI for passing and returning unions.
7205 Return an expression valid as a return value for the FUNCTION_ARG
7206 and TARGET_FUNCTION_VALUE.
7208 SIZE is the size in bytes of the union.
7209 MODE is the argument's machine mode.
7210 REGNO is the hard register the union will be passed in. */
7212 static rtx
7213 function_arg_union_value (int size, machine_mode mode, int slotno,
7214 int regno)
7216 int nwords = CEIL_NWORDS (size), i;
7217 rtx regs;
7219 /* See comment in previous function for empty structures. */
7220 if (nwords == 0)
7221 return gen_rtx_REG (mode, regno);
7223 if (slotno == SPARC_INT_ARG_MAX - 1)
7224 nwords = 1;
7226 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7228 for (i = 0; i < nwords; i++)
7230 /* Unions are passed left-justified. */
7231 XVECEXP (regs, 0, i)
7232 = gen_rtx_EXPR_LIST (VOIDmode,
7233 gen_rtx_REG (word_mode, regno),
7234 GEN_INT (UNITS_PER_WORD * i));
7235 regno++;
7238 return regs;
7241 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7242 for passing and returning BLKmode vectors.
7243 Return an expression valid as a return value for the FUNCTION_ARG
7244 and TARGET_FUNCTION_VALUE.
7246 SIZE is the size in bytes of the vector.
7247 REGNO is the FP hard register the vector will be passed in. */
7249 static rtx
7250 function_arg_vector_value (int size, int regno)
7252 const int nregs = MAX (1, size / 8);
7253 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7255 if (size < 8)
7256 XVECEXP (regs, 0, 0)
7257 = gen_rtx_EXPR_LIST (VOIDmode,
7258 gen_rtx_REG (SImode, regno),
7259 const0_rtx);
7260 else
7261 for (int i = 0; i < nregs; i++)
7262 XVECEXP (regs, 0, i)
7263 = gen_rtx_EXPR_LIST (VOIDmode,
7264 gen_rtx_REG (DImode, regno + 2*i),
7265 GEN_INT (i*8));
7267 return regs;
7270 /* Determine where to put an argument to a function.
7271 Value is zero to push the argument on the stack,
7272 or a hard register in which to store the argument.
7274 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7275 the preceding args and about the function being called.
7276 MODE is the argument's machine mode.
7277 TYPE is the data type of the argument (as a tree).
7278 This is null for libcalls where that information may
7279 not be available.
7280 NAMED is true if this argument is a named parameter
7281 (otherwise it is an extra parameter matching an ellipsis).
7282 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7283 TARGET_FUNCTION_INCOMING_ARG. */
7285 static rtx
7286 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7287 const_tree type, bool named, bool incoming)
7289 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7291 int regbase = (incoming
7292 ? SPARC_INCOMING_INT_ARG_FIRST
7293 : SPARC_OUTGOING_INT_ARG_FIRST);
7294 int slotno, regno, padding;
7295 enum mode_class mclass = GET_MODE_CLASS (mode);
7297 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7298 &regno, &padding);
7299 if (slotno == -1)
7300 return 0;
7302 /* Vector types deserve special treatment because they are polymorphic wrt
7303 their mode, depending upon whether VIS instructions are enabled. */
7304 if (type && TREE_CODE (type) == VECTOR_TYPE)
7306 HOST_WIDE_INT size = int_size_in_bytes (type);
7307 gcc_assert ((TARGET_ARCH32 && size <= 8)
7308 || (TARGET_ARCH64 && size <= 16));
7310 if (mode == BLKmode)
7311 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7313 mclass = MODE_FLOAT;
7316 if (TARGET_ARCH32)
7317 return gen_rtx_REG (mode, regno);
7319 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7320 and are promoted to registers if possible. */
7321 if (type && TREE_CODE (type) == RECORD_TYPE)
7323 HOST_WIDE_INT size = int_size_in_bytes (type);
7324 gcc_assert (size <= 16);
7326 return function_arg_record_value (type, mode, slotno, named, regbase);
7329 /* Unions up to 16 bytes in size are passed in integer registers. */
7330 else if (type && TREE_CODE (type) == UNION_TYPE)
7332 HOST_WIDE_INT size = int_size_in_bytes (type);
7333 gcc_assert (size <= 16);
7335 return function_arg_union_value (size, mode, slotno, regno);
7338 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7339 but also have the slot allocated for them.
7340 If no prototype is in scope fp values in register slots get passed
7341 in two places, either fp regs and int regs or fp regs and memory. */
7342 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7343 && SPARC_FP_REG_P (regno))
7345 rtx reg = gen_rtx_REG (mode, regno);
7346 if (cum->prototype_p || cum->libcall_p)
7347 return reg;
7348 else
7350 rtx v0, v1;
7352 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7354 int intreg;
7356 /* On incoming, we don't need to know that the value
7357 is passed in %f0 and %i0, and it confuses other parts
7358 causing needless spillage even on the simplest cases. */
7359 if (incoming)
7360 return reg;
7362 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7363 + (regno - SPARC_FP_ARG_FIRST) / 2);
7365 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7366 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7367 const0_rtx);
7368 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7370 else
7372 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7373 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7374 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7379 /* All other aggregate types are passed in an integer register in a mode
7380 corresponding to the size of the type. */
7381 else if (type && AGGREGATE_TYPE_P (type))
7383 HOST_WIDE_INT size = int_size_in_bytes (type);
7384 gcc_assert (size <= 16);
7386 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7389 return gen_rtx_REG (mode, regno);
7392 /* Handle the TARGET_FUNCTION_ARG target hook. */
7394 static rtx
7395 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7396 const_tree type, bool named)
7398 return sparc_function_arg_1 (cum, mode, type, named, false);
7401 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7403 static rtx
7404 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7405 const_tree type, bool named)
7407 return sparc_function_arg_1 (cum, mode, type, named, true);
7410 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7412 static unsigned int
7413 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7415 return ((TARGET_ARCH64
7416 && (GET_MODE_ALIGNMENT (mode) == 128
7417 || (type && TYPE_ALIGN (type) == 128)))
7418 ? 128
7419 : PARM_BOUNDARY);
7422 /* For an arg passed partly in registers and partly in memory,
7423 this is the number of bytes of registers used.
7424 For args passed entirely in registers or entirely in memory, zero.
7426 Any arg that starts in the first 6 regs but won't entirely fit in them
7427 needs partial registers on v8. On v9, structures with integer
7428 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7429 values that begin in the last fp reg [where "last fp reg" varies with the
7430 mode] will be split between that reg and memory. */
7432 static int
7433 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7434 tree type, bool named)
7436 int slotno, regno, padding;
7438 /* We pass false for incoming here, it doesn't matter. */
7439 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7440 false, &regno, &padding);
7442 if (slotno == -1)
7443 return 0;
7445 if (TARGET_ARCH32)
7447 if ((slotno + (mode == BLKmode
7448 ? CEIL_NWORDS (int_size_in_bytes (type))
7449 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7450 > SPARC_INT_ARG_MAX)
7451 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7453 else
7455 /* We are guaranteed by pass_by_reference that the size of the
7456 argument is not greater than 16 bytes, so we only need to return
7457 one word if the argument is partially passed in registers. */
7459 if (type && AGGREGATE_TYPE_P (type))
7461 int size = int_size_in_bytes (type);
7463 if (size > UNITS_PER_WORD
7464 && (slotno == SPARC_INT_ARG_MAX - 1
7465 || slotno == SPARC_FP_ARG_MAX - 1))
7466 return UNITS_PER_WORD;
7468 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7469 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7470 && ! (TARGET_FPU && named)))
7472 /* The complex types are passed as packed types. */
7473 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7474 && slotno == SPARC_INT_ARG_MAX - 1)
7475 return UNITS_PER_WORD;
7477 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7479 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7480 > SPARC_FP_ARG_MAX)
7481 return UNITS_PER_WORD;
7485 return 0;
7488 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7489 Specify whether to pass the argument by reference. */
7491 static bool
7492 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7493 machine_mode mode, const_tree type,
7494 bool named ATTRIBUTE_UNUSED)
7496 if (TARGET_ARCH32)
7497 /* Original SPARC 32-bit ABI says that structures and unions,
7498 and quad-precision floats are passed by reference. For Pascal,
7499 also pass arrays by reference. All other base types are passed
7500 in registers.
7502 Extended ABI (as implemented by the Sun compiler) says that all
7503 complex floats are passed by reference. Pass complex integers
7504 in registers up to 8 bytes. More generally, enforce the 2-word
7505 cap for passing arguments in registers.
7507 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7508 integers are passed like floats of the same size, that is in
7509 registers up to 8 bytes. Pass all vector floats by reference
7510 like structure and unions. */
7511 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7512 || mode == SCmode
7513 /* Catch CDImode, TFmode, DCmode and TCmode. */
7514 || GET_MODE_SIZE (mode) > 8
7515 || (type
7516 && TREE_CODE (type) == VECTOR_TYPE
7517 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7518 else
7519 /* Original SPARC 64-bit ABI says that structures and unions
7520 smaller than 16 bytes are passed in registers, as well as
7521 all other base types.
7523 Extended ABI (as implemented by the Sun compiler) says that
7524 complex floats are passed in registers up to 16 bytes. Pass
7525 all complex integers in registers up to 16 bytes. More generally,
7526 enforce the 2-word cap for passing arguments in registers.
7528 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7529 integers are passed like floats of the same size, that is in
7530 registers (up to 16 bytes). Pass all vector floats like structure
7531 and unions. */
7532 return ((type
7533 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7534 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7535 /* Catch CTImode and TCmode. */
7536 || GET_MODE_SIZE (mode) > 16);
7539 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7540 Update the data in CUM to advance over an argument
7541 of mode MODE and data type TYPE.
7542 TYPE is null for libcalls where that information may not be available. */
7544 static void
7545 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7546 const_tree type, bool named)
7548 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7549 int regno, padding;
7551 /* We pass false for incoming here, it doesn't matter. */
7552 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7554 /* If argument requires leading padding, add it. */
7555 cum->words += padding;
7557 if (TARGET_ARCH32)
7558 cum->words += (mode == BLKmode
7559 ? CEIL_NWORDS (int_size_in_bytes (type))
7560 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7561 else
7563 if (type && AGGREGATE_TYPE_P (type))
7565 int size = int_size_in_bytes (type);
7567 if (size <= 8)
7568 ++cum->words;
7569 else if (size <= 16)
7570 cum->words += 2;
7571 else /* passed by reference */
7572 ++cum->words;
7574 else
7575 cum->words += (mode == BLKmode
7576 ? CEIL_NWORDS (int_size_in_bytes (type))
7577 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7581 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7582 are always stored left shifted in their argument slot. */
7584 static pad_direction
7585 sparc_function_arg_padding (machine_mode mode, const_tree type)
7587 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7588 return PAD_UPWARD;
7590 /* Fall back to the default. */
7591 return default_function_arg_padding (mode, type);
7594 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7595 Specify whether to return the return value in memory. */
7597 static bool
7598 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7600 if (TARGET_ARCH32)
7601 /* Original SPARC 32-bit ABI says that structures and unions,
7602 and quad-precision floats are returned in memory. All other
7603 base types are returned in registers.
7605 Extended ABI (as implemented by the Sun compiler) says that
7606 all complex floats are returned in registers (8 FP registers
7607 at most for '_Complex long double'). Return all complex integers
7608 in registers (4 at most for '_Complex long long').
7610 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7611 integers are returned like floats of the same size, that is in
7612 registers up to 8 bytes and in memory otherwise. Return all
7613 vector floats in memory like structure and unions; note that
7614 they always have BLKmode like the latter. */
7615 return (TYPE_MODE (type) == BLKmode
7616 || TYPE_MODE (type) == TFmode
7617 || (TREE_CODE (type) == VECTOR_TYPE
7618 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7619 else
7620 /* Original SPARC 64-bit ABI says that structures and unions
7621 smaller than 32 bytes are returned in registers, as well as
7622 all other base types.
7624 Extended ABI (as implemented by the Sun compiler) says that all
7625 complex floats are returned in registers (8 FP registers at most
7626 for '_Complex long double'). Return all complex integers in
7627 registers (4 at most for '_Complex TItype').
7629 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7630 integers are returned like floats of the same size, that is in
7631 registers. Return all vector floats like structure and unions;
7632 note that they always have BLKmode like the latter. */
7633 return (TYPE_MODE (type) == BLKmode
7634 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7637 /* Handle the TARGET_STRUCT_VALUE target hook.
7638 Return where to find the structure return value address. */
7640 static rtx
7641 sparc_struct_value_rtx (tree fndecl, int incoming)
7643 if (TARGET_ARCH64)
7644 return 0;
7645 else
7647 rtx mem;
7649 if (incoming)
7650 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7651 STRUCT_VALUE_OFFSET));
7652 else
7653 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7654 STRUCT_VALUE_OFFSET));
7656 /* Only follow the SPARC ABI for fixed-size structure returns.
7657 Variable size structure returns are handled per the normal
7658 procedures in GCC. This is enabled by -mstd-struct-return */
7659 if (incoming == 2
7660 && sparc_std_struct_return
7661 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7662 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7664 /* We must check and adjust the return address, as it is optional
7665 as to whether the return object is really provided. */
7666 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7667 rtx scratch = gen_reg_rtx (SImode);
7668 rtx_code_label *endlab = gen_label_rtx ();
7670 /* Calculate the return object size. */
7671 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7672 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7673 /* Construct a temporary return value. */
7674 rtx temp_val
7675 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7677 /* Implement SPARC 32-bit psABI callee return struct checking:
7679 Fetch the instruction where we will return to and see if
7680 it's an unimp instruction (the most significant 10 bits
7681 will be zero). */
7682 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7683 plus_constant (Pmode,
7684 ret_reg, 8)));
7685 /* Assume the size is valid and pre-adjust. */
7686 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7687 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7688 0, endlab);
7689 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7690 /* Write the address of the memory pointed to by temp_val into
7691 the memory pointed to by mem. */
7692 emit_move_insn (mem, XEXP (temp_val, 0));
7693 emit_label (endlab);
7696 return mem;
7700 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7701 For v9, function return values are subject to the same rules as arguments,
7702 except that up to 32 bytes may be returned in registers. */
7704 static rtx
7705 sparc_function_value_1 (const_tree type, machine_mode mode,
7706 bool outgoing)
7708 /* Beware that the two values are swapped here wrt function_arg. */
7709 int regbase = (outgoing
7710 ? SPARC_INCOMING_INT_ARG_FIRST
7711 : SPARC_OUTGOING_INT_ARG_FIRST);
7712 enum mode_class mclass = GET_MODE_CLASS (mode);
7713 int regno;
7715 /* Vector types deserve special treatment because they are polymorphic wrt
7716 their mode, depending upon whether VIS instructions are enabled. */
7717 if (type && TREE_CODE (type) == VECTOR_TYPE)
7719 HOST_WIDE_INT size = int_size_in_bytes (type);
7720 gcc_assert ((TARGET_ARCH32 && size <= 8)
7721 || (TARGET_ARCH64 && size <= 32));
7723 if (mode == BLKmode)
7724 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7726 mclass = MODE_FLOAT;
7729 if (TARGET_ARCH64 && type)
7731 /* Structures up to 32 bytes in size are returned in registers. */
7732 if (TREE_CODE (type) == RECORD_TYPE)
7734 HOST_WIDE_INT size = int_size_in_bytes (type);
7735 gcc_assert (size <= 32);
7737 return function_arg_record_value (type, mode, 0, 1, regbase);
7740 /* Unions up to 32 bytes in size are returned in integer registers. */
7741 else if (TREE_CODE (type) == UNION_TYPE)
7743 HOST_WIDE_INT size = int_size_in_bytes (type);
7744 gcc_assert (size <= 32);
7746 return function_arg_union_value (size, mode, 0, regbase);
7749 /* Objects that require it are returned in FP registers. */
7750 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7753 /* All other aggregate types are returned in an integer register in a
7754 mode corresponding to the size of the type. */
7755 else if (AGGREGATE_TYPE_P (type))
7757 /* All other aggregate types are passed in an integer register
7758 in a mode corresponding to the size of the type. */
7759 HOST_WIDE_INT size = int_size_in_bytes (type);
7760 gcc_assert (size <= 32);
7762 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7764 /* ??? We probably should have made the same ABI change in
7765 3.4.0 as the one we made for unions. The latter was
7766 required by the SCD though, while the former is not
7767 specified, so we favored compatibility and efficiency.
7769 Now we're stuck for aggregates larger than 16 bytes,
7770 because OImode vanished in the meantime. Let's not
7771 try to be unduly clever, and simply follow the ABI
7772 for unions in that case. */
7773 if (mode == BLKmode)
7774 return function_arg_union_value (size, mode, 0, regbase);
7775 else
7776 mclass = MODE_INT;
7779 /* We should only have pointer and integer types at this point. This
7780 must match sparc_promote_function_mode. */
7781 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7782 mode = word_mode;
7785 /* We should only have pointer and integer types at this point, except with
7786 -freg-struct-return. This must match sparc_promote_function_mode. */
7787 else if (TARGET_ARCH32
7788 && !(type && AGGREGATE_TYPE_P (type))
7789 && mclass == MODE_INT
7790 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7791 mode = word_mode;
7793 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7794 regno = SPARC_FP_ARG_FIRST;
7795 else
7796 regno = regbase;
7798 return gen_rtx_REG (mode, regno);
7801 /* Handle TARGET_FUNCTION_VALUE.
7802 On the SPARC, the value is found in the first "output" register, but the
7803 called function leaves it in the first "input" register. */
7805 static rtx
7806 sparc_function_value (const_tree valtype,
7807 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7808 bool outgoing)
7810 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7813 /* Handle TARGET_LIBCALL_VALUE. */
7815 static rtx
7816 sparc_libcall_value (machine_mode mode,
7817 const_rtx fun ATTRIBUTE_UNUSED)
7819 return sparc_function_value_1 (NULL_TREE, mode, false);
7822 /* Handle FUNCTION_VALUE_REGNO_P.
7823 On the SPARC, the first "output" reg is used for integer values, and the
7824 first floating point register is used for floating point values. */
7826 static bool
7827 sparc_function_value_regno_p (const unsigned int regno)
7829 return (regno == 8 || (TARGET_FPU && regno == 32));
7832 /* Do what is necessary for `va_start'. We look at the current function
7833 to determine if stdarg or varargs is used and return the address of
7834 the first unnamed parameter. */
7836 static rtx
7837 sparc_builtin_saveregs (void)
7839 int first_reg = crtl->args.info.words;
7840 rtx address;
7841 int regno;
7843 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7844 emit_move_insn (gen_rtx_MEM (word_mode,
7845 gen_rtx_PLUS (Pmode,
7846 frame_pointer_rtx,
7847 GEN_INT (FIRST_PARM_OFFSET (0)
7848 + (UNITS_PER_WORD
7849 * regno)))),
7850 gen_rtx_REG (word_mode,
7851 SPARC_INCOMING_INT_ARG_FIRST + regno));
7853 address = gen_rtx_PLUS (Pmode,
7854 frame_pointer_rtx,
7855 GEN_INT (FIRST_PARM_OFFSET (0)
7856 + UNITS_PER_WORD * first_reg));
7858 return address;
7861 /* Implement `va_start' for stdarg. */
7863 static void
7864 sparc_va_start (tree valist, rtx nextarg)
7866 nextarg = expand_builtin_saveregs ();
7867 std_expand_builtin_va_start (valist, nextarg);
7870 /* Implement `va_arg' for stdarg. */
7872 static tree
7873 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7874 gimple_seq *post_p)
7876 HOST_WIDE_INT size, rsize, align;
7877 tree addr, incr;
7878 bool indirect;
7879 tree ptrtype = build_pointer_type (type);
7881 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7883 indirect = true;
7884 size = rsize = UNITS_PER_WORD;
7885 align = 0;
7887 else
7889 indirect = false;
7890 size = int_size_in_bytes (type);
7891 rsize = ROUND_UP (size, UNITS_PER_WORD);
7892 align = 0;
7894 if (TARGET_ARCH64)
7896 /* For SPARC64, objects requiring 16-byte alignment get it. */
7897 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7898 align = 2 * UNITS_PER_WORD;
7900 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7901 are left-justified in their slots. */
7902 if (AGGREGATE_TYPE_P (type))
7904 if (size == 0)
7905 size = rsize = UNITS_PER_WORD;
7906 else
7907 size = rsize;
7912 incr = valist;
7913 if (align)
7915 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7916 incr = fold_convert (sizetype, incr);
7917 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7918 size_int (-align));
7919 incr = fold_convert (ptr_type_node, incr);
7922 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7923 addr = incr;
7925 if (BYTES_BIG_ENDIAN && size < rsize)
7926 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7928 if (indirect)
7930 addr = fold_convert (build_pointer_type (ptrtype), addr);
7931 addr = build_va_arg_indirect_ref (addr);
7934 /* If the address isn't aligned properly for the type, we need a temporary.
7935 FIXME: This is inefficient, usually we can do this in registers. */
7936 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7938 tree tmp = create_tmp_var (type, "va_arg_tmp");
7939 tree dest_addr = build_fold_addr_expr (tmp);
7940 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7941 3, dest_addr, addr, size_int (rsize));
7942 TREE_ADDRESSABLE (tmp) = 1;
7943 gimplify_and_add (copy, pre_p);
7944 addr = dest_addr;
7947 else
7948 addr = fold_convert (ptrtype, addr);
7950 incr = fold_build_pointer_plus_hwi (incr, rsize);
7951 gimplify_assign (valist, incr, post_p);
7953 return build_va_arg_indirect_ref (addr);
7956 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7957 Specify whether the vector mode is supported by the hardware. */
7959 static bool
7960 sparc_vector_mode_supported_p (machine_mode mode)
7962 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7965 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7967 static machine_mode
7968 sparc_preferred_simd_mode (scalar_mode mode)
7970 if (TARGET_VIS)
7971 switch (mode)
7973 case E_SImode:
7974 return V2SImode;
7975 case E_HImode:
7976 return V4HImode;
7977 case E_QImode:
7978 return V8QImode;
7980 default:;
7983 return word_mode;
7986 /* Return the string to output an unconditional branch to LABEL, which is
7987 the operand number of the label.
7989 DEST is the destination insn (i.e. the label), INSN is the source. */
7991 const char *
7992 output_ubranch (rtx dest, rtx_insn *insn)
7994 static char string[64];
7995 bool v9_form = false;
7996 int delta;
7997 char *p;
7999 /* Even if we are trying to use cbcond for this, evaluate
8000 whether we can use V9 branches as our backup plan. */
8002 delta = 5000000;
8003 if (INSN_ADDRESSES_SET_P ())
8004 delta = (INSN_ADDRESSES (INSN_UID (dest))
8005 - INSN_ADDRESSES (INSN_UID (insn)));
8007 /* Leave some instructions for "slop". */
8008 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8009 v9_form = true;
8011 if (TARGET_CBCOND)
8013 bool emit_nop = emit_cbcond_nop (insn);
8014 bool far = false;
8015 const char *rval;
8017 if (delta < -500 || delta > 500)
8018 far = true;
8020 if (far)
8022 if (v9_form)
8023 rval = "ba,a,pt\t%%xcc, %l0";
8024 else
8025 rval = "b,a\t%l0";
8027 else
8029 if (emit_nop)
8030 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8031 else
8032 rval = "cwbe\t%%g0, %%g0, %l0";
8034 return rval;
8037 if (v9_form)
8038 strcpy (string, "ba%*,pt\t%%xcc, ");
8039 else
8040 strcpy (string, "b%*\t");
8042 p = strchr (string, '\0');
8043 *p++ = '%';
8044 *p++ = 'l';
8045 *p++ = '0';
8046 *p++ = '%';
8047 *p++ = '(';
8048 *p = '\0';
8050 return string;
8053 /* Return the string to output a conditional branch to LABEL, which is
8054 the operand number of the label. OP is the conditional expression.
8055 XEXP (OP, 0) is assumed to be a condition code register (integer or
8056 floating point) and its mode specifies what kind of comparison we made.
8058 DEST is the destination insn (i.e. the label), INSN is the source.
8060 REVERSED is nonzero if we should reverse the sense of the comparison.
8062 ANNUL is nonzero if we should generate an annulling branch. */
8064 const char *
8065 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8066 rtx_insn *insn)
8068 static char string[64];
8069 enum rtx_code code = GET_CODE (op);
8070 rtx cc_reg = XEXP (op, 0);
8071 machine_mode mode = GET_MODE (cc_reg);
8072 const char *labelno, *branch;
8073 int spaces = 8, far;
8074 char *p;
8076 /* v9 branches are limited to +-1MB. If it is too far away,
8077 change
8079 bne,pt %xcc, .LC30
8083 be,pn %xcc, .+12
8085 ba .LC30
8089 fbne,a,pn %fcc2, .LC29
8093 fbe,pt %fcc2, .+16
8095 ba .LC29 */
8097 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8098 if (reversed ^ far)
8100 /* Reversal of FP compares takes care -- an ordered compare
8101 becomes an unordered compare and vice versa. */
8102 if (mode == CCFPmode || mode == CCFPEmode)
8103 code = reverse_condition_maybe_unordered (code);
8104 else
8105 code = reverse_condition (code);
8108 /* Start by writing the branch condition. */
8109 if (mode == CCFPmode || mode == CCFPEmode)
8111 switch (code)
8113 case NE:
8114 branch = "fbne";
8115 break;
8116 case EQ:
8117 branch = "fbe";
8118 break;
8119 case GE:
8120 branch = "fbge";
8121 break;
8122 case GT:
8123 branch = "fbg";
8124 break;
8125 case LE:
8126 branch = "fble";
8127 break;
8128 case LT:
8129 branch = "fbl";
8130 break;
8131 case UNORDERED:
8132 branch = "fbu";
8133 break;
8134 case ORDERED:
8135 branch = "fbo";
8136 break;
8137 case UNGT:
8138 branch = "fbug";
8139 break;
8140 case UNLT:
8141 branch = "fbul";
8142 break;
8143 case UNEQ:
8144 branch = "fbue";
8145 break;
8146 case UNGE:
8147 branch = "fbuge";
8148 break;
8149 case UNLE:
8150 branch = "fbule";
8151 break;
8152 case LTGT:
8153 branch = "fblg";
8154 break;
8155 default:
8156 gcc_unreachable ();
8159 /* ??? !v9: FP branches cannot be preceded by another floating point
8160 insn. Because there is currently no concept of pre-delay slots,
8161 we can fix this only by always emitting a nop before a floating
8162 point branch. */
8164 string[0] = '\0';
8165 if (! TARGET_V9)
8166 strcpy (string, "nop\n\t");
8167 strcat (string, branch);
8169 else
8171 switch (code)
8173 case NE:
8174 if (mode == CCVmode || mode == CCXVmode)
8175 branch = "bvs";
8176 else
8177 branch = "bne";
8178 break;
8179 case EQ:
8180 if (mode == CCVmode || mode == CCXVmode)
8181 branch = "bvc";
8182 else
8183 branch = "be";
8184 break;
8185 case GE:
8186 if (mode == CCNZmode || mode == CCXNZmode)
8187 branch = "bpos";
8188 else
8189 branch = "bge";
8190 break;
8191 case GT:
8192 branch = "bg";
8193 break;
8194 case LE:
8195 branch = "ble";
8196 break;
8197 case LT:
8198 if (mode == CCNZmode || mode == CCXNZmode)
8199 branch = "bneg";
8200 else
8201 branch = "bl";
8202 break;
8203 case GEU:
8204 branch = "bgeu";
8205 break;
8206 case GTU:
8207 branch = "bgu";
8208 break;
8209 case LEU:
8210 branch = "bleu";
8211 break;
8212 case LTU:
8213 branch = "blu";
8214 break;
8215 default:
8216 gcc_unreachable ();
8218 strcpy (string, branch);
8220 spaces -= strlen (branch);
8221 p = strchr (string, '\0');
8223 /* Now add the annulling, the label, and a possible noop. */
8224 if (annul && ! far)
8226 strcpy (p, ",a");
8227 p += 2;
8228 spaces -= 2;
8231 if (TARGET_V9)
8233 rtx note;
8234 int v8 = 0;
8236 if (! far && insn && INSN_ADDRESSES_SET_P ())
8238 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8239 - INSN_ADDRESSES (INSN_UID (insn)));
8240 /* Leave some instructions for "slop". */
8241 if (delta < -260000 || delta >= 260000)
8242 v8 = 1;
8245 switch (mode)
8247 case E_CCmode:
8248 case E_CCNZmode:
8249 case E_CCCmode:
8250 case E_CCVmode:
8251 labelno = "%%icc, ";
8252 if (v8)
8253 labelno = "";
8254 break;
8255 case E_CCXmode:
8256 case E_CCXNZmode:
8257 case E_CCXCmode:
8258 case E_CCXVmode:
8259 labelno = "%%xcc, ";
8260 gcc_assert (!v8);
8261 break;
8262 case E_CCFPmode:
8263 case E_CCFPEmode:
8265 static char v9_fcc_labelno[] = "%%fccX, ";
8266 /* Set the char indicating the number of the fcc reg to use. */
8267 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8268 labelno = v9_fcc_labelno;
8269 if (v8)
8271 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8272 labelno = "";
8275 break;
8276 default:
8277 gcc_unreachable ();
8280 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8282 strcpy (p,
8283 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8284 >= profile_probability::even ()) ^ far)
8285 ? ",pt" : ",pn");
8286 p += 3;
8287 spaces -= 3;
8290 else
8291 labelno = "";
8293 if (spaces > 0)
8294 *p++ = '\t';
8295 else
8296 *p++ = ' ';
8297 strcpy (p, labelno);
8298 p = strchr (p, '\0');
8299 if (far)
8301 strcpy (p, ".+12\n\t nop\n\tb\t");
8302 /* Skip the next insn if requested or
8303 if we know that it will be a nop. */
8304 if (annul || ! final_sequence)
8305 p[3] = '6';
8306 p += 14;
8308 *p++ = '%';
8309 *p++ = 'l';
8310 *p++ = label + '0';
8311 *p++ = '%';
8312 *p++ = '#';
8313 *p = '\0';
8315 return string;
8318 /* Emit a library call comparison between floating point X and Y.
8319 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8320 Return the new operator to be used in the comparison sequence.
8322 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8323 values as arguments instead of the TFmode registers themselves,
8324 that's why we cannot call emit_float_lib_cmp. */
8327 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8329 const char *qpfunc;
8330 rtx slot0, slot1, result, tem, tem2, libfunc;
8331 machine_mode mode;
8332 enum rtx_code new_comparison;
8334 switch (comparison)
8336 case EQ:
8337 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8338 break;
8340 case NE:
8341 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8342 break;
8344 case GT:
8345 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8346 break;
8348 case GE:
8349 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8350 break;
8352 case LT:
8353 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8354 break;
8356 case LE:
8357 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8358 break;
8360 case ORDERED:
8361 case UNORDERED:
8362 case UNGT:
8363 case UNLT:
8364 case UNEQ:
8365 case UNGE:
8366 case UNLE:
8367 case LTGT:
8368 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8369 break;
8371 default:
8372 gcc_unreachable ();
8375 if (TARGET_ARCH64)
8377 if (MEM_P (x))
8379 tree expr = MEM_EXPR (x);
8380 if (expr)
8381 mark_addressable (expr);
8382 slot0 = x;
8384 else
8386 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8387 emit_move_insn (slot0, x);
8390 if (MEM_P (y))
8392 tree expr = MEM_EXPR (y);
8393 if (expr)
8394 mark_addressable (expr);
8395 slot1 = y;
8397 else
8399 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8400 emit_move_insn (slot1, y);
8403 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8404 emit_library_call (libfunc, LCT_NORMAL,
8405 DImode,
8406 XEXP (slot0, 0), Pmode,
8407 XEXP (slot1, 0), Pmode);
8408 mode = DImode;
8410 else
8412 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8413 emit_library_call (libfunc, LCT_NORMAL,
8414 SImode,
8415 x, TFmode, y, TFmode);
8416 mode = SImode;
8420 /* Immediately move the result of the libcall into a pseudo
8421 register so reload doesn't clobber the value if it needs
8422 the return register for a spill reg. */
8423 result = gen_reg_rtx (mode);
8424 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8426 switch (comparison)
8428 default:
8429 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8430 case ORDERED:
8431 case UNORDERED:
8432 new_comparison = (comparison == UNORDERED ? EQ : NE);
8433 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8434 case UNGT:
8435 case UNGE:
8436 new_comparison = (comparison == UNGT ? GT : NE);
8437 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8438 case UNLE:
8439 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8440 case UNLT:
8441 tem = gen_reg_rtx (mode);
8442 if (TARGET_ARCH32)
8443 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8444 else
8445 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8446 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8447 case UNEQ:
8448 case LTGT:
8449 tem = gen_reg_rtx (mode);
8450 if (TARGET_ARCH32)
8451 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8452 else
8453 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8454 tem2 = gen_reg_rtx (mode);
8455 if (TARGET_ARCH32)
8456 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8457 else
8458 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8459 new_comparison = (comparison == UNEQ ? EQ : NE);
8460 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8463 gcc_unreachable ();
8466 /* Generate an unsigned DImode to FP conversion. This is the same code
8467 optabs would emit if we didn't have TFmode patterns. */
8469 void
8470 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8472 rtx i0, i1, f0, in, out;
8474 out = operands[0];
8475 in = force_reg (DImode, operands[1]);
8476 rtx_code_label *neglab = gen_label_rtx ();
8477 rtx_code_label *donelab = gen_label_rtx ();
8478 i0 = gen_reg_rtx (DImode);
8479 i1 = gen_reg_rtx (DImode);
8480 f0 = gen_reg_rtx (mode);
8482 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8484 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8485 emit_jump_insn (gen_jump (donelab));
8486 emit_barrier ();
8488 emit_label (neglab);
8490 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8491 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8492 emit_insn (gen_iordi3 (i0, i0, i1));
8493 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8494 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8496 emit_label (donelab);
8499 /* Generate an FP to unsigned DImode conversion. This is the same code
8500 optabs would emit if we didn't have TFmode patterns. */
8502 void
8503 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8505 rtx i0, i1, f0, in, out, limit;
8507 out = operands[0];
8508 in = force_reg (mode, operands[1]);
8509 rtx_code_label *neglab = gen_label_rtx ();
8510 rtx_code_label *donelab = gen_label_rtx ();
8511 i0 = gen_reg_rtx (DImode);
8512 i1 = gen_reg_rtx (DImode);
8513 limit = gen_reg_rtx (mode);
8514 f0 = gen_reg_rtx (mode);
8516 emit_move_insn (limit,
8517 const_double_from_real_value (
8518 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8519 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8521 emit_insn (gen_rtx_SET (out,
8522 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8523 emit_jump_insn (gen_jump (donelab));
8524 emit_barrier ();
8526 emit_label (neglab);
8528 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8529 emit_insn (gen_rtx_SET (i0,
8530 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8531 emit_insn (gen_movdi (i1, const1_rtx));
8532 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8533 emit_insn (gen_xordi3 (out, i0, i1));
8535 emit_label (donelab);
8538 /* Return the string to output a compare and branch instruction to DEST.
8539 DEST is the destination insn (i.e. the label), INSN is the source,
8540 and OP is the conditional expression. */
8542 const char *
8543 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8545 machine_mode mode = GET_MODE (XEXP (op, 0));
8546 enum rtx_code code = GET_CODE (op);
8547 const char *cond_str, *tmpl;
8548 int far, emit_nop, len;
8549 static char string[64];
8550 char size_char;
8552 /* Compare and Branch is limited to +-2KB. If it is too far away,
8553 change
8555 cxbne X, Y, .LC30
8559 cxbe X, Y, .+16
8561 ba,pt xcc, .LC30
8562 nop */
8564 len = get_attr_length (insn);
8566 far = len == 4;
8567 emit_nop = len == 2;
8569 if (far)
8570 code = reverse_condition (code);
8572 size_char = ((mode == SImode) ? 'w' : 'x');
8574 switch (code)
8576 case NE:
8577 cond_str = "ne";
8578 break;
8580 case EQ:
8581 cond_str = "e";
8582 break;
8584 case GE:
8585 cond_str = "ge";
8586 break;
8588 case GT:
8589 cond_str = "g";
8590 break;
8592 case LE:
8593 cond_str = "le";
8594 break;
8596 case LT:
8597 cond_str = "l";
8598 break;
8600 case GEU:
8601 cond_str = "cc";
8602 break;
8604 case GTU:
8605 cond_str = "gu";
8606 break;
8608 case LEU:
8609 cond_str = "leu";
8610 break;
8612 case LTU:
8613 cond_str = "cs";
8614 break;
8616 default:
8617 gcc_unreachable ();
8620 if (far)
8622 int veryfar = 1, delta;
8624 if (INSN_ADDRESSES_SET_P ())
8626 delta = (INSN_ADDRESSES (INSN_UID (dest))
8627 - INSN_ADDRESSES (INSN_UID (insn)));
8628 /* Leave some instructions for "slop". */
8629 if (delta >= -260000 && delta < 260000)
8630 veryfar = 0;
8633 if (veryfar)
8634 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8635 else
8636 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8638 else
8640 if (emit_nop)
8641 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8642 else
8643 tmpl = "c%cb%s\t%%1, %%2, %%3";
8646 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8648 return string;
8651 /* Return the string to output a conditional branch to LABEL, testing
8652 register REG. LABEL is the operand number of the label; REG is the
8653 operand number of the reg. OP is the conditional expression. The mode
8654 of REG says what kind of comparison we made.
8656 DEST is the destination insn (i.e. the label), INSN is the source.
8658 REVERSED is nonzero if we should reverse the sense of the comparison.
8660 ANNUL is nonzero if we should generate an annulling branch. */
8662 const char *
8663 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8664 int annul, rtx_insn *insn)
8666 static char string[64];
8667 enum rtx_code code = GET_CODE (op);
8668 machine_mode mode = GET_MODE (XEXP (op, 0));
8669 rtx note;
8670 int far;
8671 char *p;
8673 /* branch on register are limited to +-128KB. If it is too far away,
8674 change
8676 brnz,pt %g1, .LC30
8680 brz,pn %g1, .+12
8682 ba,pt %xcc, .LC30
8686 brgez,a,pn %o1, .LC29
8690 brlz,pt %o1, .+16
8692 ba,pt %xcc, .LC29 */
8694 far = get_attr_length (insn) >= 3;
8696 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8697 if (reversed ^ far)
8698 code = reverse_condition (code);
8700 /* Only 64-bit versions of these instructions exist. */
8701 gcc_assert (mode == DImode);
8703 /* Start by writing the branch condition. */
8705 switch (code)
8707 case NE:
8708 strcpy (string, "brnz");
8709 break;
8711 case EQ:
8712 strcpy (string, "brz");
8713 break;
8715 case GE:
8716 strcpy (string, "brgez");
8717 break;
8719 case LT:
8720 strcpy (string, "brlz");
8721 break;
8723 case LE:
8724 strcpy (string, "brlez");
8725 break;
8727 case GT:
8728 strcpy (string, "brgz");
8729 break;
8731 default:
8732 gcc_unreachable ();
8735 p = strchr (string, '\0');
8737 /* Now add the annulling, reg, label, and nop. */
8738 if (annul && ! far)
8740 strcpy (p, ",a");
8741 p += 2;
8744 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8746 strcpy (p,
8747 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8748 >= profile_probability::even ()) ^ far)
8749 ? ",pt" : ",pn");
8750 p += 3;
8753 *p = p < string + 8 ? '\t' : ' ';
8754 p++;
8755 *p++ = '%';
8756 *p++ = '0' + reg;
8757 *p++ = ',';
8758 *p++ = ' ';
8759 if (far)
8761 int veryfar = 1, delta;
8763 if (INSN_ADDRESSES_SET_P ())
8765 delta = (INSN_ADDRESSES (INSN_UID (dest))
8766 - INSN_ADDRESSES (INSN_UID (insn)));
8767 /* Leave some instructions for "slop". */
8768 if (delta >= -260000 && delta < 260000)
8769 veryfar = 0;
8772 strcpy (p, ".+12\n\t nop\n\t");
8773 /* Skip the next insn if requested or
8774 if we know that it will be a nop. */
8775 if (annul || ! final_sequence)
8776 p[3] = '6';
8777 p += 12;
8778 if (veryfar)
8780 strcpy (p, "b\t");
8781 p += 2;
8783 else
8785 strcpy (p, "ba,pt\t%%xcc, ");
8786 p += 13;
8789 *p++ = '%';
8790 *p++ = 'l';
8791 *p++ = '0' + label;
8792 *p++ = '%';
8793 *p++ = '#';
8794 *p = '\0';
8796 return string;
8799 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8800 Such instructions cannot be used in the delay slot of return insn on v9.
8801 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8804 static int
8805 epilogue_renumber (register rtx *where, int test)
8807 register const char *fmt;
8808 register int i;
8809 register enum rtx_code code;
8811 if (*where == 0)
8812 return 0;
8814 code = GET_CODE (*where);
8816 switch (code)
8818 case REG:
8819 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8820 return 1;
8821 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8822 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8823 /* fallthrough */
8824 case SCRATCH:
8825 case CC0:
8826 case PC:
8827 case CONST_INT:
8828 case CONST_WIDE_INT:
8829 case CONST_DOUBLE:
8830 return 0;
8832 /* Do not replace the frame pointer with the stack pointer because
8833 it can cause the delayed instruction to load below the stack.
8834 This occurs when instructions like:
8836 (set (reg/i:SI 24 %i0)
8837 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8838 (const_int -20 [0xffffffec])) 0))
8840 are in the return delayed slot. */
8841 case PLUS:
8842 if (GET_CODE (XEXP (*where, 0)) == REG
8843 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8844 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8845 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8846 return 1;
8847 break;
8849 case MEM:
8850 if (SPARC_STACK_BIAS
8851 && GET_CODE (XEXP (*where, 0)) == REG
8852 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8853 return 1;
8854 break;
8856 default:
8857 break;
8860 fmt = GET_RTX_FORMAT (code);
8862 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8864 if (fmt[i] == 'E')
8866 register int j;
8867 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8868 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8869 return 1;
8871 else if (fmt[i] == 'e'
8872 && epilogue_renumber (&(XEXP (*where, i)), test))
8873 return 1;
8875 return 0;
8878 /* Leaf functions and non-leaf functions have different needs. */
8880 static const int
8881 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8883 static const int
8884 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8886 static const int *const reg_alloc_orders[] = {
8887 reg_leaf_alloc_order,
8888 reg_nonleaf_alloc_order};
8890 void
8891 order_regs_for_local_alloc (void)
8893 static int last_order_nonleaf = 1;
8895 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8897 last_order_nonleaf = !last_order_nonleaf;
8898 memcpy ((char *) reg_alloc_order,
8899 (const char *) reg_alloc_orders[last_order_nonleaf],
8900 FIRST_PSEUDO_REGISTER * sizeof (int));
8904 /* Return 1 if REG and MEM are legitimate enough to allow the various
8905 MEM<-->REG splits to be run. */
8908 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8910 /* Punt if we are here by mistake. */
8911 gcc_assert (reload_completed);
8913 /* We must have an offsettable memory reference. */
8914 if (!offsettable_memref_p (mem))
8915 return 0;
8917 /* If we have legitimate args for ldd/std, we do not want
8918 the split to happen. */
8919 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8920 return 0;
8922 /* Success. */
8923 return 1;
8926 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8928 void
8929 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8931 rtx high_part = gen_highpart (mode, dest);
8932 rtx low_part = gen_lowpart (mode, dest);
8933 rtx word0 = adjust_address (src, mode, 0);
8934 rtx word1 = adjust_address (src, mode, 4);
8936 if (reg_overlap_mentioned_p (high_part, word1))
8938 emit_move_insn_1 (low_part, word1);
8939 emit_move_insn_1 (high_part, word0);
8941 else
8943 emit_move_insn_1 (high_part, word0);
8944 emit_move_insn_1 (low_part, word1);
8948 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8950 void
8951 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8953 rtx word0 = adjust_address (dest, mode, 0);
8954 rtx word1 = adjust_address (dest, mode, 4);
8955 rtx high_part = gen_highpart (mode, src);
8956 rtx low_part = gen_lowpart (mode, src);
8958 emit_move_insn_1 (word0, high_part);
8959 emit_move_insn_1 (word1, low_part);
8962 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8965 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8967 /* Punt if we are here by mistake. */
8968 gcc_assert (reload_completed);
8970 if (GET_CODE (reg1) == SUBREG)
8971 reg1 = SUBREG_REG (reg1);
8972 if (GET_CODE (reg1) != REG)
8973 return 0;
8974 const int regno1 = REGNO (reg1);
8976 if (GET_CODE (reg2) == SUBREG)
8977 reg2 = SUBREG_REG (reg2);
8978 if (GET_CODE (reg2) != REG)
8979 return 0;
8980 const int regno2 = REGNO (reg2);
8982 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8983 return 1;
8985 if (TARGET_VIS3)
8987 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8988 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8989 return 1;
8992 return 0;
8995 /* Split a REG <--> REG move into a pair of moves in MODE. */
8997 void
8998 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9000 rtx dest1 = gen_highpart (mode, dest);
9001 rtx dest2 = gen_lowpart (mode, dest);
9002 rtx src1 = gen_highpart (mode, src);
9003 rtx src2 = gen_lowpart (mode, src);
9005 /* Now emit using the real source and destination we found, swapping
9006 the order if we detect overlap. */
9007 if (reg_overlap_mentioned_p (dest1, src2))
9009 emit_move_insn_1 (dest2, src2);
9010 emit_move_insn_1 (dest1, src1);
9012 else
9014 emit_move_insn_1 (dest1, src1);
9015 emit_move_insn_1 (dest2, src2);
9019 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9020 This makes them candidates for using ldd and std insns.
9022 Note reg1 and reg2 *must* be hard registers. */
9025 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9027 /* We might have been passed a SUBREG. */
9028 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9029 return 0;
9031 if (REGNO (reg1) % 2 != 0)
9032 return 0;
9034 /* Integer ldd is deprecated in SPARC V9 */
9035 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9036 return 0;
9038 return (REGNO (reg1) == REGNO (reg2) - 1);
9041 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9042 an ldd or std insn.
9044 This can only happen when addr1 and addr2, the addresses in mem1
9045 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9046 addr1 must also be aligned on a 64-bit boundary.
9048 Also iff dependent_reg_rtx is not null it should not be used to
9049 compute the address for mem1, i.e. we cannot optimize a sequence
9050 like:
9051 ld [%o0], %o0
9052 ld [%o0 + 4], %o1
9054 ldd [%o0], %o0
9055 nor:
9056 ld [%g3 + 4], %g3
9057 ld [%g3], %g2
9059 ldd [%g3], %g2
9061 But, note that the transformation from:
9062 ld [%g2 + 4], %g3
9063 ld [%g2], %g2
9065 ldd [%g2], %g2
9066 is perfectly fine. Thus, the peephole2 patterns always pass us
9067 the destination register of the first load, never the second one.
9069 For stores we don't have a similar problem, so dependent_reg_rtx is
9070 NULL_RTX. */
9073 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9075 rtx addr1, addr2;
9076 unsigned int reg1;
9077 HOST_WIDE_INT offset1;
9079 /* The mems cannot be volatile. */
9080 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9081 return 0;
9083 /* MEM1 should be aligned on a 64-bit boundary. */
9084 if (MEM_ALIGN (mem1) < 64)
9085 return 0;
9087 addr1 = XEXP (mem1, 0);
9088 addr2 = XEXP (mem2, 0);
9090 /* Extract a register number and offset (if used) from the first addr. */
9091 if (GET_CODE (addr1) == PLUS)
9093 /* If not a REG, return zero. */
9094 if (GET_CODE (XEXP (addr1, 0)) != REG)
9095 return 0;
9096 else
9098 reg1 = REGNO (XEXP (addr1, 0));
9099 /* The offset must be constant! */
9100 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9101 return 0;
9102 offset1 = INTVAL (XEXP (addr1, 1));
9105 else if (GET_CODE (addr1) != REG)
9106 return 0;
9107 else
9109 reg1 = REGNO (addr1);
9110 /* This was a simple (mem (reg)) expression. Offset is 0. */
9111 offset1 = 0;
9114 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9115 if (GET_CODE (addr2) != PLUS)
9116 return 0;
9118 if (GET_CODE (XEXP (addr2, 0)) != REG
9119 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9120 return 0;
9122 if (reg1 != REGNO (XEXP (addr2, 0)))
9123 return 0;
9125 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9126 return 0;
9128 /* The first offset must be evenly divisible by 8 to ensure the
9129 address is 64-bit aligned. */
9130 if (offset1 % 8 != 0)
9131 return 0;
9133 /* The offset for the second addr must be 4 more than the first addr. */
9134 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9135 return 0;
9137 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9138 instructions. */
9139 return 1;
9142 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9145 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9147 rtx x = widen_memory_access (mem1, mode, 0);
9148 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9149 return x;
9152 /* Return 1 if reg is a pseudo, or is the first register in
9153 a hard register pair. This makes it suitable for use in
9154 ldd and std insns. */
9157 register_ok_for_ldd (rtx reg)
9159 /* We might have been passed a SUBREG. */
9160 if (!REG_P (reg))
9161 return 0;
9163 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9164 return (REGNO (reg) % 2 == 0);
9166 return 1;
9169 /* Return 1 if OP, a MEM, has an address which is known to be
9170 aligned to an 8-byte boundary. */
9173 memory_ok_for_ldd (rtx op)
9175 /* In 64-bit mode, we assume that the address is word-aligned. */
9176 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9177 return 0;
9179 if (! can_create_pseudo_p ()
9180 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9181 return 0;
9183 return 1;
9186 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9188 static bool
9189 sparc_print_operand_punct_valid_p (unsigned char code)
9191 if (code == '#'
9192 || code == '*'
9193 || code == '('
9194 || code == ')'
9195 || code == '_'
9196 || code == '&')
9197 return true;
9199 return false;
9202 /* Implement TARGET_PRINT_OPERAND.
9203 Print operand X (an rtx) in assembler syntax to file FILE.
9204 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9205 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9207 static void
9208 sparc_print_operand (FILE *file, rtx x, int code)
9210 const char *s;
9212 switch (code)
9214 case '#':
9215 /* Output an insn in a delay slot. */
9216 if (final_sequence)
9217 sparc_indent_opcode = 1;
9218 else
9219 fputs ("\n\t nop", file);
9220 return;
9221 case '*':
9222 /* Output an annul flag if there's nothing for the delay slot and we
9223 are optimizing. This is always used with '(' below.
9224 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9225 this is a dbx bug. So, we only do this when optimizing.
9226 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9227 Always emit a nop in case the next instruction is a branch. */
9228 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9229 fputs (",a", file);
9230 return;
9231 case '(':
9232 /* Output a 'nop' if there's nothing for the delay slot and we are
9233 not optimizing. This is always used with '*' above. */
9234 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9235 fputs ("\n\t nop", file);
9236 else if (final_sequence)
9237 sparc_indent_opcode = 1;
9238 return;
9239 case ')':
9240 /* Output the right displacement from the saved PC on function return.
9241 The caller may have placed an "unimp" insn immediately after the call
9242 so we have to account for it. This insn is used in the 32-bit ABI
9243 when calling a function that returns a non zero-sized structure. The
9244 64-bit ABI doesn't have it. Be careful to have this test be the same
9245 as that for the call. The exception is when sparc_std_struct_return
9246 is enabled, the psABI is followed exactly and the adjustment is made
9247 by the code in sparc_struct_value_rtx. The call emitted is the same
9248 when sparc_std_struct_return is enabled. */
9249 if (!TARGET_ARCH64
9250 && cfun->returns_struct
9251 && !sparc_std_struct_return
9252 && DECL_SIZE (DECL_RESULT (current_function_decl))
9253 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9254 == INTEGER_CST
9255 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9256 fputs ("12", file);
9257 else
9258 fputc ('8', file);
9259 return;
9260 case '_':
9261 /* Output the Embedded Medium/Anywhere code model base register. */
9262 fputs (EMBMEDANY_BASE_REG, file);
9263 return;
9264 case '&':
9265 /* Print some local dynamic TLS name. */
9266 if (const char *name = get_some_local_dynamic_name ())
9267 assemble_name (file, name);
9268 else
9269 output_operand_lossage ("'%%&' used without any "
9270 "local dynamic TLS references");
9271 return;
9273 case 'Y':
9274 /* Adjust the operand to take into account a RESTORE operation. */
9275 if (GET_CODE (x) == CONST_INT)
9276 break;
9277 else if (GET_CODE (x) != REG)
9278 output_operand_lossage ("invalid %%Y operand");
9279 else if (REGNO (x) < 8)
9280 fputs (reg_names[REGNO (x)], file);
9281 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9282 fputs (reg_names[REGNO (x)-16], file);
9283 else
9284 output_operand_lossage ("invalid %%Y operand");
9285 return;
9286 case 'L':
9287 /* Print out the low order register name of a register pair. */
9288 if (WORDS_BIG_ENDIAN)
9289 fputs (reg_names[REGNO (x)+1], file);
9290 else
9291 fputs (reg_names[REGNO (x)], file);
9292 return;
9293 case 'H':
9294 /* Print out the high order register name of a register pair. */
9295 if (WORDS_BIG_ENDIAN)
9296 fputs (reg_names[REGNO (x)], file);
9297 else
9298 fputs (reg_names[REGNO (x)+1], file);
9299 return;
9300 case 'R':
9301 /* Print out the second register name of a register pair or quad.
9302 I.e., R (%o0) => %o1. */
9303 fputs (reg_names[REGNO (x)+1], file);
9304 return;
9305 case 'S':
9306 /* Print out the third register name of a register quad.
9307 I.e., S (%o0) => %o2. */
9308 fputs (reg_names[REGNO (x)+2], file);
9309 return;
9310 case 'T':
9311 /* Print out the fourth register name of a register quad.
9312 I.e., T (%o0) => %o3. */
9313 fputs (reg_names[REGNO (x)+3], file);
9314 return;
9315 case 'x':
9316 /* Print a condition code register. */
9317 if (REGNO (x) == SPARC_ICC_REG)
9319 switch (GET_MODE (x))
9321 case E_CCmode:
9322 case E_CCNZmode:
9323 case E_CCCmode:
9324 case E_CCVmode:
9325 s = "%icc";
9326 break;
9327 case E_CCXmode:
9328 case E_CCXNZmode:
9329 case E_CCXCmode:
9330 case E_CCXVmode:
9331 s = "%xcc";
9332 break;
9333 default:
9334 gcc_unreachable ();
9336 fputs (s, file);
9338 else
9339 /* %fccN register */
9340 fputs (reg_names[REGNO (x)], file);
9341 return;
9342 case 'm':
9343 /* Print the operand's address only. */
9344 output_address (GET_MODE (x), XEXP (x, 0));
9345 return;
9346 case 'r':
9347 /* In this case we need a register. Use %g0 if the
9348 operand is const0_rtx. */
9349 if (x == const0_rtx
9350 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9352 fputs ("%g0", file);
9353 return;
9355 else
9356 break;
9358 case 'A':
9359 switch (GET_CODE (x))
9361 case IOR:
9362 s = "or";
9363 break;
9364 case AND:
9365 s = "and";
9366 break;
9367 case XOR:
9368 s = "xor";
9369 break;
9370 default:
9371 output_operand_lossage ("invalid %%A operand");
9372 s = "";
9373 break;
9375 fputs (s, file);
9376 return;
9378 case 'B':
9379 switch (GET_CODE (x))
9381 case IOR:
9382 s = "orn";
9383 break;
9384 case AND:
9385 s = "andn";
9386 break;
9387 case XOR:
9388 s = "xnor";
9389 break;
9390 default:
9391 output_operand_lossage ("invalid %%B operand");
9392 s = "";
9393 break;
9395 fputs (s, file);
9396 return;
9398 /* This is used by the conditional move instructions. */
9399 case 'C':
9401 machine_mode mode = GET_MODE (XEXP (x, 0));
9402 switch (GET_CODE (x))
9404 case NE:
9405 if (mode == CCVmode || mode == CCXVmode)
9406 s = "vs";
9407 else
9408 s = "ne";
9409 break;
9410 case EQ:
9411 if (mode == CCVmode || mode == CCXVmode)
9412 s = "vc";
9413 else
9414 s = "e";
9415 break;
9416 case GE:
9417 if (mode == CCNZmode || mode == CCXNZmode)
9418 s = "pos";
9419 else
9420 s = "ge";
9421 break;
9422 case GT:
9423 s = "g";
9424 break;
9425 case LE:
9426 s = "le";
9427 break;
9428 case LT:
9429 if (mode == CCNZmode || mode == CCXNZmode)
9430 s = "neg";
9431 else
9432 s = "l";
9433 break;
9434 case GEU:
9435 s = "geu";
9436 break;
9437 case GTU:
9438 s = "gu";
9439 break;
9440 case LEU:
9441 s = "leu";
9442 break;
9443 case LTU:
9444 s = "lu";
9445 break;
9446 case LTGT:
9447 s = "lg";
9448 break;
9449 case UNORDERED:
9450 s = "u";
9451 break;
9452 case ORDERED:
9453 s = "o";
9454 break;
9455 case UNLT:
9456 s = "ul";
9457 break;
9458 case UNLE:
9459 s = "ule";
9460 break;
9461 case UNGT:
9462 s = "ug";
9463 break;
9464 case UNGE:
9465 s = "uge"
9466 ; break;
9467 case UNEQ:
9468 s = "ue";
9469 break;
9470 default:
9471 output_operand_lossage ("invalid %%C operand");
9472 s = "";
9473 break;
9475 fputs (s, file);
9476 return;
9479 /* This are used by the movr instruction pattern. */
9480 case 'D':
9482 switch (GET_CODE (x))
9484 case NE:
9485 s = "ne";
9486 break;
9487 case EQ:
9488 s = "e";
9489 break;
9490 case GE:
9491 s = "gez";
9492 break;
9493 case LT:
9494 s = "lz";
9495 break;
9496 case LE:
9497 s = "lez";
9498 break;
9499 case GT:
9500 s = "gz";
9501 break;
9502 default:
9503 output_operand_lossage ("invalid %%D operand");
9504 s = "";
9505 break;
9507 fputs (s, file);
9508 return;
9511 case 'b':
9513 /* Print a sign-extended character. */
9514 int i = trunc_int_for_mode (INTVAL (x), QImode);
9515 fprintf (file, "%d", i);
9516 return;
9519 case 'f':
9520 /* Operand must be a MEM; write its address. */
9521 if (GET_CODE (x) != MEM)
9522 output_operand_lossage ("invalid %%f operand");
9523 output_address (GET_MODE (x), XEXP (x, 0));
9524 return;
9526 case 's':
9528 /* Print a sign-extended 32-bit value. */
9529 HOST_WIDE_INT i;
9530 if (GET_CODE(x) == CONST_INT)
9531 i = INTVAL (x);
9532 else
9534 output_operand_lossage ("invalid %%s operand");
9535 return;
9537 i = trunc_int_for_mode (i, SImode);
9538 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9539 return;
9542 case 0:
9543 /* Do nothing special. */
9544 break;
9546 default:
9547 /* Undocumented flag. */
9548 output_operand_lossage ("invalid operand output code");
9551 if (GET_CODE (x) == REG)
9552 fputs (reg_names[REGNO (x)], file);
9553 else if (GET_CODE (x) == MEM)
9555 fputc ('[', file);
9556 /* Poor Sun assembler doesn't understand absolute addressing. */
9557 if (CONSTANT_P (XEXP (x, 0)))
9558 fputs ("%g0+", file);
9559 output_address (GET_MODE (x), XEXP (x, 0));
9560 fputc (']', file);
9562 else if (GET_CODE (x) == HIGH)
9564 fputs ("%hi(", file);
9565 output_addr_const (file, XEXP (x, 0));
9566 fputc (')', file);
9568 else if (GET_CODE (x) == LO_SUM)
9570 sparc_print_operand (file, XEXP (x, 0), 0);
9571 if (TARGET_CM_MEDMID)
9572 fputs ("+%l44(", file);
9573 else
9574 fputs ("+%lo(", file);
9575 output_addr_const (file, XEXP (x, 1));
9576 fputc (')', file);
9578 else if (GET_CODE (x) == CONST_DOUBLE)
9579 output_operand_lossage ("floating-point constant not a valid immediate operand");
9580 else
9581 output_addr_const (file, x);
9584 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9586 static void
9587 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9589 register rtx base, index = 0;
9590 int offset = 0;
9591 register rtx addr = x;
9593 if (REG_P (addr))
9594 fputs (reg_names[REGNO (addr)], file);
9595 else if (GET_CODE (addr) == PLUS)
9597 if (CONST_INT_P (XEXP (addr, 0)))
9598 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9599 else if (CONST_INT_P (XEXP (addr, 1)))
9600 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9601 else
9602 base = XEXP (addr, 0), index = XEXP (addr, 1);
9603 if (GET_CODE (base) == LO_SUM)
9605 gcc_assert (USE_AS_OFFSETABLE_LO10
9606 && TARGET_ARCH64
9607 && ! TARGET_CM_MEDMID);
9608 output_operand (XEXP (base, 0), 0);
9609 fputs ("+%lo(", file);
9610 output_address (VOIDmode, XEXP (base, 1));
9611 fprintf (file, ")+%d", offset);
9613 else
9615 fputs (reg_names[REGNO (base)], file);
9616 if (index == 0)
9617 fprintf (file, "%+d", offset);
9618 else if (REG_P (index))
9619 fprintf (file, "+%s", reg_names[REGNO (index)]);
9620 else if (GET_CODE (index) == SYMBOL_REF
9621 || GET_CODE (index) == LABEL_REF
9622 || GET_CODE (index) == CONST)
9623 fputc ('+', file), output_addr_const (file, index);
9624 else gcc_unreachable ();
9627 else if (GET_CODE (addr) == MINUS
9628 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9630 output_addr_const (file, XEXP (addr, 0));
9631 fputs ("-(", file);
9632 output_addr_const (file, XEXP (addr, 1));
9633 fputs ("-.)", file);
9635 else if (GET_CODE (addr) == LO_SUM)
9637 output_operand (XEXP (addr, 0), 0);
9638 if (TARGET_CM_MEDMID)
9639 fputs ("+%l44(", file);
9640 else
9641 fputs ("+%lo(", file);
9642 output_address (VOIDmode, XEXP (addr, 1));
9643 fputc (')', file);
9645 else if (flag_pic
9646 && GET_CODE (addr) == CONST
9647 && GET_CODE (XEXP (addr, 0)) == MINUS
9648 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9649 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9650 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9652 addr = XEXP (addr, 0);
9653 output_addr_const (file, XEXP (addr, 0));
9654 /* Group the args of the second CONST in parenthesis. */
9655 fputs ("-(", file);
9656 /* Skip past the second CONST--it does nothing for us. */
9657 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9658 /* Close the parenthesis. */
9659 fputc (')', file);
9661 else
9663 output_addr_const (file, addr);
9667 /* Target hook for assembling integer objects. The sparc version has
9668 special handling for aligned DI-mode objects. */
9670 static bool
9671 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9673 /* ??? We only output .xword's for symbols and only then in environments
9674 where the assembler can handle them. */
9675 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9677 if (TARGET_V9)
9679 assemble_integer_with_op ("\t.xword\t", x);
9680 return true;
9682 else
9684 assemble_aligned_integer (4, const0_rtx);
9685 assemble_aligned_integer (4, x);
9686 return true;
9689 return default_assemble_integer (x, size, aligned_p);
9692 /* Return the value of a code used in the .proc pseudo-op that says
9693 what kind of result this function returns. For non-C types, we pick
9694 the closest C type. */
9696 #ifndef SHORT_TYPE_SIZE
9697 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9698 #endif
9700 #ifndef INT_TYPE_SIZE
9701 #define INT_TYPE_SIZE BITS_PER_WORD
9702 #endif
9704 #ifndef LONG_TYPE_SIZE
9705 #define LONG_TYPE_SIZE BITS_PER_WORD
9706 #endif
9708 #ifndef LONG_LONG_TYPE_SIZE
9709 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9710 #endif
9712 #ifndef FLOAT_TYPE_SIZE
9713 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9714 #endif
9716 #ifndef DOUBLE_TYPE_SIZE
9717 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9718 #endif
9720 #ifndef LONG_DOUBLE_TYPE_SIZE
9721 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9722 #endif
9724 unsigned long
9725 sparc_type_code (register tree type)
9727 register unsigned long qualifiers = 0;
9728 register unsigned shift;
9730 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9731 setting more, since some assemblers will give an error for this. Also,
9732 we must be careful to avoid shifts of 32 bits or more to avoid getting
9733 unpredictable results. */
9735 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9737 switch (TREE_CODE (type))
9739 case ERROR_MARK:
9740 return qualifiers;
9742 case ARRAY_TYPE:
9743 qualifiers |= (3 << shift);
9744 break;
9746 case FUNCTION_TYPE:
9747 case METHOD_TYPE:
9748 qualifiers |= (2 << shift);
9749 break;
9751 case POINTER_TYPE:
9752 case REFERENCE_TYPE:
9753 case OFFSET_TYPE:
9754 qualifiers |= (1 << shift);
9755 break;
9757 case RECORD_TYPE:
9758 return (qualifiers | 8);
9760 case UNION_TYPE:
9761 case QUAL_UNION_TYPE:
9762 return (qualifiers | 9);
9764 case ENUMERAL_TYPE:
9765 return (qualifiers | 10);
9767 case VOID_TYPE:
9768 return (qualifiers | 16);
9770 case INTEGER_TYPE:
9771 /* If this is a range type, consider it to be the underlying
9772 type. */
9773 if (TREE_TYPE (type) != 0)
9774 break;
9776 /* Carefully distinguish all the standard types of C,
9777 without messing up if the language is not C. We do this by
9778 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9779 look at both the names and the above fields, but that's redundant.
9780 Any type whose size is between two C types will be considered
9781 to be the wider of the two types. Also, we do not have a
9782 special code to use for "long long", so anything wider than
9783 long is treated the same. Note that we can't distinguish
9784 between "int" and "long" in this code if they are the same
9785 size, but that's fine, since neither can the assembler. */
9787 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9788 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9790 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9791 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9793 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9794 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9796 else
9797 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9799 case REAL_TYPE:
9800 /* If this is a range type, consider it to be the underlying
9801 type. */
9802 if (TREE_TYPE (type) != 0)
9803 break;
9805 /* Carefully distinguish all the standard types of C,
9806 without messing up if the language is not C. */
9808 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9809 return (qualifiers | 6);
9811 else
9812 return (qualifiers | 7);
9814 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9815 /* ??? We need to distinguish between double and float complex types,
9816 but I don't know how yet because I can't reach this code from
9817 existing front-ends. */
9818 return (qualifiers | 7); /* Who knows? */
9820 case VECTOR_TYPE:
9821 case BOOLEAN_TYPE: /* Boolean truth value type. */
9822 case LANG_TYPE:
9823 case NULLPTR_TYPE:
9824 return qualifiers;
9826 default:
9827 gcc_unreachable (); /* Not a type! */
9831 return qualifiers;
9834 /* Nested function support. */
9836 /* Emit RTL insns to initialize the variable parts of a trampoline.
9837 FNADDR is an RTX for the address of the function's pure code.
9838 CXT is an RTX for the static chain value for the function.
9840 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9841 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9842 (to store insns). This is a bit excessive. Perhaps a different
9843 mechanism would be better here.
9845 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9847 static void
9848 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9850 /* SPARC 32-bit trampoline:
9852 sethi %hi(fn), %g1
9853 sethi %hi(static), %g2
9854 jmp %g1+%lo(fn)
9855 or %g2, %lo(static), %g2
9857 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9858 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9861 emit_move_insn
9862 (adjust_address (m_tramp, SImode, 0),
9863 expand_binop (SImode, ior_optab,
9864 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9865 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9866 NULL_RTX, 1, OPTAB_DIRECT));
9868 emit_move_insn
9869 (adjust_address (m_tramp, SImode, 4),
9870 expand_binop (SImode, ior_optab,
9871 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9872 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9873 NULL_RTX, 1, OPTAB_DIRECT));
9875 emit_move_insn
9876 (adjust_address (m_tramp, SImode, 8),
9877 expand_binop (SImode, ior_optab,
9878 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9879 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9880 NULL_RTX, 1, OPTAB_DIRECT));
9882 emit_move_insn
9883 (adjust_address (m_tramp, SImode, 12),
9884 expand_binop (SImode, ior_optab,
9885 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9886 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9887 NULL_RTX, 1, OPTAB_DIRECT));
9889 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9890 aligned on a 16 byte boundary so one flush clears it all. */
9891 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9892 if (sparc_cpu != PROCESSOR_ULTRASPARC
9893 && sparc_cpu != PROCESSOR_ULTRASPARC3
9894 && sparc_cpu != PROCESSOR_NIAGARA
9895 && sparc_cpu != PROCESSOR_NIAGARA2
9896 && sparc_cpu != PROCESSOR_NIAGARA3
9897 && sparc_cpu != PROCESSOR_NIAGARA4
9898 && sparc_cpu != PROCESSOR_NIAGARA7
9899 && sparc_cpu != PROCESSOR_M8)
9900 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9902 /* Call __enable_execute_stack after writing onto the stack to make sure
9903 the stack address is accessible. */
9904 #ifdef HAVE_ENABLE_EXECUTE_STACK
9905 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9906 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9907 #endif
9911 /* The 64-bit version is simpler because it makes more sense to load the
9912 values as "immediate" data out of the trampoline. It's also easier since
9913 we can read the PC without clobbering a register. */
9915 static void
9916 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9918 /* SPARC 64-bit trampoline:
9920 rd %pc, %g1
9921 ldx [%g1+24], %g5
9922 jmp %g5
9923 ldx [%g1+16], %g5
9924 +16 bytes data
9927 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9928 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9929 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9930 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9931 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9932 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9933 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9934 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9935 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9936 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9937 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9939 if (sparc_cpu != PROCESSOR_ULTRASPARC
9940 && sparc_cpu != PROCESSOR_ULTRASPARC3
9941 && sparc_cpu != PROCESSOR_NIAGARA
9942 && sparc_cpu != PROCESSOR_NIAGARA2
9943 && sparc_cpu != PROCESSOR_NIAGARA3
9944 && sparc_cpu != PROCESSOR_NIAGARA4
9945 && sparc_cpu != PROCESSOR_NIAGARA7
9946 && sparc_cpu != PROCESSOR_M8)
9947 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9949 /* Call __enable_execute_stack after writing onto the stack to make sure
9950 the stack address is accessible. */
9951 #ifdef HAVE_ENABLE_EXECUTE_STACK
9952 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9953 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9954 #endif
9957 /* Worker for TARGET_TRAMPOLINE_INIT. */
9959 static void
9960 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9962 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9963 cxt = force_reg (Pmode, cxt);
9964 if (TARGET_ARCH64)
9965 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9966 else
9967 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9970 /* Adjust the cost of a scheduling dependency. Return the new cost of
9971 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9973 static int
9974 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9975 int cost)
9977 enum attr_type insn_type;
9979 if (recog_memoized (insn) < 0)
9980 return cost;
9982 insn_type = get_attr_type (insn);
9984 if (dep_type == 0)
9986 /* Data dependency; DEP_INSN writes a register that INSN reads some
9987 cycles later. */
9989 /* if a load, then the dependence must be on the memory address;
9990 add an extra "cycle". Note that the cost could be two cycles
9991 if the reg was written late in an instruction group; we ca not tell
9992 here. */
9993 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9994 return cost + 3;
9996 /* Get the delay only if the address of the store is the dependence. */
9997 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9999 rtx pat = PATTERN(insn);
10000 rtx dep_pat = PATTERN (dep_insn);
10002 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10003 return cost; /* This should not happen! */
10005 /* The dependency between the two instructions was on the data that
10006 is being stored. Assume that this implies that the address of the
10007 store is not dependent. */
10008 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10009 return cost;
10011 return cost + 3; /* An approximation. */
10014 /* A shift instruction cannot receive its data from an instruction
10015 in the same cycle; add a one cycle penalty. */
10016 if (insn_type == TYPE_SHIFT)
10017 return cost + 3; /* Split before cascade into shift. */
10019 else
10021 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10022 INSN writes some cycles later. */
10024 /* These are only significant for the fpu unit; writing a fp reg before
10025 the fpu has finished with it stalls the processor. */
10027 /* Reusing an integer register causes no problems. */
10028 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10029 return 0;
10032 return cost;
10035 static int
10036 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10037 int cost)
10039 enum attr_type insn_type, dep_type;
10040 rtx pat = PATTERN(insn);
10041 rtx dep_pat = PATTERN (dep_insn);
10043 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10044 return cost;
10046 insn_type = get_attr_type (insn);
10047 dep_type = get_attr_type (dep_insn);
10049 switch (dtype)
10051 case 0:
10052 /* Data dependency; DEP_INSN writes a register that INSN reads some
10053 cycles later. */
10055 switch (insn_type)
10057 case TYPE_STORE:
10058 case TYPE_FPSTORE:
10059 /* Get the delay iff the address of the store is the dependence. */
10060 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10061 return cost;
10063 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10064 return cost;
10065 return cost + 3;
10067 case TYPE_LOAD:
10068 case TYPE_SLOAD:
10069 case TYPE_FPLOAD:
10070 /* If a load, then the dependence must be on the memory address. If
10071 the addresses aren't equal, then it might be a false dependency */
10072 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10074 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10075 || GET_CODE (SET_DEST (dep_pat)) != MEM
10076 || GET_CODE (SET_SRC (pat)) != MEM
10077 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10078 XEXP (SET_SRC (pat), 0)))
10079 return cost + 2;
10081 return cost + 8;
10083 break;
10085 case TYPE_BRANCH:
10086 /* Compare to branch latency is 0. There is no benefit from
10087 separating compare and branch. */
10088 if (dep_type == TYPE_COMPARE)
10089 return 0;
10090 /* Floating point compare to branch latency is less than
10091 compare to conditional move. */
10092 if (dep_type == TYPE_FPCMP)
10093 return cost - 1;
10094 break;
10095 default:
10096 break;
10098 break;
10100 case REG_DEP_ANTI:
10101 /* Anti-dependencies only penalize the fpu unit. */
10102 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10103 return 0;
10104 break;
10106 default:
10107 break;
10110 return cost;
10113 static int
10114 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10115 unsigned int)
10117 switch (sparc_cpu)
10119 case PROCESSOR_SUPERSPARC:
10120 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10121 break;
10122 case PROCESSOR_HYPERSPARC:
10123 case PROCESSOR_SPARCLITE86X:
10124 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10125 break;
10126 default:
10127 break;
10129 return cost;
10132 static void
10133 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10134 int sched_verbose ATTRIBUTE_UNUSED,
10135 int max_ready ATTRIBUTE_UNUSED)
10138 static int
10139 sparc_use_sched_lookahead (void)
10141 if (sparc_cpu == PROCESSOR_NIAGARA
10142 || sparc_cpu == PROCESSOR_NIAGARA2
10143 || sparc_cpu == PROCESSOR_NIAGARA3)
10144 return 0;
10145 if (sparc_cpu == PROCESSOR_NIAGARA4
10146 || sparc_cpu == PROCESSOR_NIAGARA7
10147 || sparc_cpu == PROCESSOR_M8)
10148 return 2;
10149 if (sparc_cpu == PROCESSOR_ULTRASPARC
10150 || sparc_cpu == PROCESSOR_ULTRASPARC3)
10151 return 4;
10152 if ((1 << sparc_cpu) &
10153 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
10154 (1 << PROCESSOR_SPARCLITE86X)))
10155 return 3;
10156 return 0;
10159 static int
10160 sparc_issue_rate (void)
10162 switch (sparc_cpu)
10164 case PROCESSOR_NIAGARA:
10165 case PROCESSOR_NIAGARA2:
10166 case PROCESSOR_NIAGARA3:
10167 default:
10168 return 1;
10169 case PROCESSOR_NIAGARA4:
10170 case PROCESSOR_NIAGARA7:
10171 case PROCESSOR_V9:
10172 /* Assume V9 processors are capable of at least dual-issue. */
10173 return 2;
10174 case PROCESSOR_SUPERSPARC:
10175 return 3;
10176 case PROCESSOR_HYPERSPARC:
10177 case PROCESSOR_SPARCLITE86X:
10178 return 2;
10179 case PROCESSOR_ULTRASPARC:
10180 case PROCESSOR_ULTRASPARC3:
10181 case PROCESSOR_M8:
10182 return 4;
10186 static int
10187 set_extends (rtx_insn *insn)
10189 register rtx pat = PATTERN (insn);
10191 switch (GET_CODE (SET_SRC (pat)))
10193 /* Load and some shift instructions zero extend. */
10194 case MEM:
10195 case ZERO_EXTEND:
10196 /* sethi clears the high bits */
10197 case HIGH:
10198 /* LO_SUM is used with sethi. sethi cleared the high
10199 bits and the values used with lo_sum are positive */
10200 case LO_SUM:
10201 /* Store flag stores 0 or 1 */
10202 case LT: case LTU:
10203 case GT: case GTU:
10204 case LE: case LEU:
10205 case GE: case GEU:
10206 case EQ:
10207 case NE:
10208 return 1;
10209 case AND:
10211 rtx op0 = XEXP (SET_SRC (pat), 0);
10212 rtx op1 = XEXP (SET_SRC (pat), 1);
10213 if (GET_CODE (op1) == CONST_INT)
10214 return INTVAL (op1) >= 0;
10215 if (GET_CODE (op0) != REG)
10216 return 0;
10217 if (sparc_check_64 (op0, insn) == 1)
10218 return 1;
10219 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10221 case IOR:
10222 case XOR:
10224 rtx op0 = XEXP (SET_SRC (pat), 0);
10225 rtx op1 = XEXP (SET_SRC (pat), 1);
10226 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10227 return 0;
10228 if (GET_CODE (op1) == CONST_INT)
10229 return INTVAL (op1) >= 0;
10230 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10232 case LSHIFTRT:
10233 return GET_MODE (SET_SRC (pat)) == SImode;
10234 /* Positive integers leave the high bits zero. */
10235 case CONST_INT:
10236 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10237 case ASHIFTRT:
10238 case SIGN_EXTEND:
10239 return - (GET_MODE (SET_SRC (pat)) == SImode);
10240 case REG:
10241 return sparc_check_64 (SET_SRC (pat), insn);
10242 default:
10243 return 0;
10247 /* We _ought_ to have only one kind per function, but... */
10248 static GTY(()) rtx sparc_addr_diff_list;
10249 static GTY(()) rtx sparc_addr_list;
10251 void
10252 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10254 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10255 if (diff)
10256 sparc_addr_diff_list
10257 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10258 else
10259 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10262 static void
10263 sparc_output_addr_vec (rtx vec)
10265 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10266 int idx, vlen = XVECLEN (body, 0);
10268 #ifdef ASM_OUTPUT_ADDR_VEC_START
10269 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10270 #endif
10272 #ifdef ASM_OUTPUT_CASE_LABEL
10273 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10274 NEXT_INSN (lab));
10275 #else
10276 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10277 #endif
10279 for (idx = 0; idx < vlen; idx++)
10281 ASM_OUTPUT_ADDR_VEC_ELT
10282 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10285 #ifdef ASM_OUTPUT_ADDR_VEC_END
10286 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10287 #endif
10290 static void
10291 sparc_output_addr_diff_vec (rtx vec)
10293 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10294 rtx base = XEXP (XEXP (body, 0), 0);
10295 int idx, vlen = XVECLEN (body, 1);
10297 #ifdef ASM_OUTPUT_ADDR_VEC_START
10298 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10299 #endif
10301 #ifdef ASM_OUTPUT_CASE_LABEL
10302 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10303 NEXT_INSN (lab));
10304 #else
10305 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10306 #endif
10308 for (idx = 0; idx < vlen; idx++)
10310 ASM_OUTPUT_ADDR_DIFF_ELT
10311 (asm_out_file,
10312 body,
10313 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10314 CODE_LABEL_NUMBER (base));
10317 #ifdef ASM_OUTPUT_ADDR_VEC_END
10318 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10319 #endif
10322 static void
10323 sparc_output_deferred_case_vectors (void)
10325 rtx t;
10326 int align;
10328 if (sparc_addr_list == NULL_RTX
10329 && sparc_addr_diff_list == NULL_RTX)
10330 return;
10332 /* Align to cache line in the function's code section. */
10333 switch_to_section (current_function_section ());
10335 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10336 if (align > 0)
10337 ASM_OUTPUT_ALIGN (asm_out_file, align);
10339 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10340 sparc_output_addr_vec (XEXP (t, 0));
10341 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10342 sparc_output_addr_diff_vec (XEXP (t, 0));
10344 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10347 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10348 unknown. Return 1 if the high bits are zero, -1 if the register is
10349 sign extended. */
10351 sparc_check_64 (rtx x, rtx_insn *insn)
10353 /* If a register is set only once it is safe to ignore insns this
10354 code does not know how to handle. The loop will either recognize
10355 the single set and return the correct value or fail to recognize
10356 it and return 0. */
10357 int set_once = 0;
10358 rtx y = x;
10360 gcc_assert (GET_CODE (x) == REG);
10362 if (GET_MODE (x) == DImode)
10363 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10365 if (flag_expensive_optimizations
10366 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10367 set_once = 1;
10369 if (insn == 0)
10371 if (set_once)
10372 insn = get_last_insn_anywhere ();
10373 else
10374 return 0;
10377 while ((insn = PREV_INSN (insn)))
10379 switch (GET_CODE (insn))
10381 case JUMP_INSN:
10382 case NOTE:
10383 break;
10384 case CODE_LABEL:
10385 case CALL_INSN:
10386 default:
10387 if (! set_once)
10388 return 0;
10389 break;
10390 case INSN:
10392 rtx pat = PATTERN (insn);
10393 if (GET_CODE (pat) != SET)
10394 return 0;
10395 if (rtx_equal_p (x, SET_DEST (pat)))
10396 return set_extends (insn);
10397 if (y && rtx_equal_p (y, SET_DEST (pat)))
10398 return set_extends (insn);
10399 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10400 return 0;
10404 return 0;
10407 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10408 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10410 const char *
10411 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10413 static char asm_code[60];
10415 /* The scratch register is only required when the destination
10416 register is not a 64-bit global or out register. */
10417 if (which_alternative != 2)
10418 operands[3] = operands[0];
10420 /* We can only shift by constants <= 63. */
10421 if (GET_CODE (operands[2]) == CONST_INT)
10422 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10424 if (GET_CODE (operands[1]) == CONST_INT)
10426 output_asm_insn ("mov\t%1, %3", operands);
10428 else
10430 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10431 if (sparc_check_64 (operands[1], insn) <= 0)
10432 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10433 output_asm_insn ("or\t%L1, %3, %3", operands);
10436 strcpy (asm_code, opcode);
10438 if (which_alternative != 2)
10439 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10440 else
10441 return
10442 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10445 /* Output rtl to increment the profiler label LABELNO
10446 for profiling a function entry. */
10448 void
10449 sparc_profile_hook (int labelno)
10451 char buf[32];
10452 rtx lab, fun;
10454 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10455 if (NO_PROFILE_COUNTERS)
10457 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10459 else
10461 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10462 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10463 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10467 #ifdef TARGET_SOLARIS
10468 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10470 static void
10471 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10472 tree decl ATTRIBUTE_UNUSED)
10474 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10476 solaris_elf_asm_comdat_section (name, flags, decl);
10477 return;
10480 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10482 if (!(flags & SECTION_DEBUG))
10483 fputs (",#alloc", asm_out_file);
10484 if (flags & SECTION_WRITE)
10485 fputs (",#write", asm_out_file);
10486 if (flags & SECTION_TLS)
10487 fputs (",#tls", asm_out_file);
10488 if (flags & SECTION_CODE)
10489 fputs (",#execinstr", asm_out_file);
10491 if (flags & SECTION_NOTYPE)
10493 else if (flags & SECTION_BSS)
10494 fputs (",#nobits", asm_out_file);
10495 else
10496 fputs (",#progbits", asm_out_file);
10498 fputc ('\n', asm_out_file);
10500 #endif /* TARGET_SOLARIS */
10502 /* We do not allow indirect calls to be optimized into sibling calls.
10504 We cannot use sibling calls when delayed branches are disabled
10505 because they will likely require the call delay slot to be filled.
10507 Also, on SPARC 32-bit we cannot emit a sibling call when the
10508 current function returns a structure. This is because the "unimp
10509 after call" convention would cause the callee to return to the
10510 wrong place. The generic code already disallows cases where the
10511 function being called returns a structure.
10513 It may seem strange how this last case could occur. Usually there
10514 is code after the call which jumps to epilogue code which dumps the
10515 return value into the struct return area. That ought to invalidate
10516 the sibling call right? Well, in the C++ case we can end up passing
10517 the pointer to the struct return area to a constructor (which returns
10518 void) and then nothing else happens. Such a sibling call would look
10519 valid without the added check here.
10521 VxWorks PIC PLT entries require the global pointer to be initialized
10522 on entry. We therefore can't emit sibling calls to them. */
10523 static bool
10524 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10526 return (decl
10527 && flag_delayed_branch
10528 && (TARGET_ARCH64 || ! cfun->returns_struct)
10529 && !(TARGET_VXWORKS_RTP
10530 && flag_pic
10531 && !targetm.binds_local_p (decl)));
10534 /* libfunc renaming. */
10536 static void
10537 sparc_init_libfuncs (void)
10539 if (TARGET_ARCH32)
10541 /* Use the subroutines that Sun's library provides for integer
10542 multiply and divide. The `*' prevents an underscore from
10543 being prepended by the compiler. .umul is a little faster
10544 than .mul. */
10545 set_optab_libfunc (smul_optab, SImode, "*.umul");
10546 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10547 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10548 set_optab_libfunc (smod_optab, SImode, "*.rem");
10549 set_optab_libfunc (umod_optab, SImode, "*.urem");
10551 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10552 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10553 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10554 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10555 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10556 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10558 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10559 is because with soft-float, the SFmode and DFmode sqrt
10560 instructions will be absent, and the compiler will notice and
10561 try to use the TFmode sqrt instruction for calls to the
10562 builtin function sqrt, but this fails. */
10563 if (TARGET_FPU)
10564 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10566 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10567 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10568 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10569 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10570 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10571 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10573 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10574 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10575 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10576 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10578 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10579 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10580 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10581 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10583 if (DITF_CONVERSION_LIBFUNCS)
10585 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10586 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10587 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10588 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10591 if (SUN_CONVERSION_LIBFUNCS)
10593 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10594 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10595 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10596 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10599 if (TARGET_ARCH64)
10601 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10602 do not exist in the library. Make sure the compiler does not
10603 emit calls to them by accident. (It should always use the
10604 hardware instructions.) */
10605 set_optab_libfunc (smul_optab, SImode, 0);
10606 set_optab_libfunc (sdiv_optab, SImode, 0);
10607 set_optab_libfunc (udiv_optab, SImode, 0);
10608 set_optab_libfunc (smod_optab, SImode, 0);
10609 set_optab_libfunc (umod_optab, SImode, 0);
10611 if (SUN_INTEGER_MULTIPLY_64)
10613 set_optab_libfunc (smul_optab, DImode, "__mul64");
10614 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10615 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10616 set_optab_libfunc (smod_optab, DImode, "__rem64");
10617 set_optab_libfunc (umod_optab, DImode, "__urem64");
10620 if (SUN_CONVERSION_LIBFUNCS)
10622 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10623 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10624 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10625 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10630 /* SPARC builtins. */
10631 enum sparc_builtins
10633 /* FPU builtins. */
10634 SPARC_BUILTIN_LDFSR,
10635 SPARC_BUILTIN_STFSR,
10637 /* VIS 1.0 builtins. */
10638 SPARC_BUILTIN_FPACK16,
10639 SPARC_BUILTIN_FPACK32,
10640 SPARC_BUILTIN_FPACKFIX,
10641 SPARC_BUILTIN_FEXPAND,
10642 SPARC_BUILTIN_FPMERGE,
10643 SPARC_BUILTIN_FMUL8X16,
10644 SPARC_BUILTIN_FMUL8X16AU,
10645 SPARC_BUILTIN_FMUL8X16AL,
10646 SPARC_BUILTIN_FMUL8SUX16,
10647 SPARC_BUILTIN_FMUL8ULX16,
10648 SPARC_BUILTIN_FMULD8SUX16,
10649 SPARC_BUILTIN_FMULD8ULX16,
10650 SPARC_BUILTIN_FALIGNDATAV4HI,
10651 SPARC_BUILTIN_FALIGNDATAV8QI,
10652 SPARC_BUILTIN_FALIGNDATAV2SI,
10653 SPARC_BUILTIN_FALIGNDATADI,
10654 SPARC_BUILTIN_WRGSR,
10655 SPARC_BUILTIN_RDGSR,
10656 SPARC_BUILTIN_ALIGNADDR,
10657 SPARC_BUILTIN_ALIGNADDRL,
10658 SPARC_BUILTIN_PDIST,
10659 SPARC_BUILTIN_EDGE8,
10660 SPARC_BUILTIN_EDGE8L,
10661 SPARC_BUILTIN_EDGE16,
10662 SPARC_BUILTIN_EDGE16L,
10663 SPARC_BUILTIN_EDGE32,
10664 SPARC_BUILTIN_EDGE32L,
10665 SPARC_BUILTIN_FCMPLE16,
10666 SPARC_BUILTIN_FCMPLE32,
10667 SPARC_BUILTIN_FCMPNE16,
10668 SPARC_BUILTIN_FCMPNE32,
10669 SPARC_BUILTIN_FCMPGT16,
10670 SPARC_BUILTIN_FCMPGT32,
10671 SPARC_BUILTIN_FCMPEQ16,
10672 SPARC_BUILTIN_FCMPEQ32,
10673 SPARC_BUILTIN_FPADD16,
10674 SPARC_BUILTIN_FPADD16S,
10675 SPARC_BUILTIN_FPADD32,
10676 SPARC_BUILTIN_FPADD32S,
10677 SPARC_BUILTIN_FPSUB16,
10678 SPARC_BUILTIN_FPSUB16S,
10679 SPARC_BUILTIN_FPSUB32,
10680 SPARC_BUILTIN_FPSUB32S,
10681 SPARC_BUILTIN_ARRAY8,
10682 SPARC_BUILTIN_ARRAY16,
10683 SPARC_BUILTIN_ARRAY32,
10685 /* VIS 2.0 builtins. */
10686 SPARC_BUILTIN_EDGE8N,
10687 SPARC_BUILTIN_EDGE8LN,
10688 SPARC_BUILTIN_EDGE16N,
10689 SPARC_BUILTIN_EDGE16LN,
10690 SPARC_BUILTIN_EDGE32N,
10691 SPARC_BUILTIN_EDGE32LN,
10692 SPARC_BUILTIN_BMASK,
10693 SPARC_BUILTIN_BSHUFFLEV4HI,
10694 SPARC_BUILTIN_BSHUFFLEV8QI,
10695 SPARC_BUILTIN_BSHUFFLEV2SI,
10696 SPARC_BUILTIN_BSHUFFLEDI,
10698 /* VIS 3.0 builtins. */
10699 SPARC_BUILTIN_CMASK8,
10700 SPARC_BUILTIN_CMASK16,
10701 SPARC_BUILTIN_CMASK32,
10702 SPARC_BUILTIN_FCHKSM16,
10703 SPARC_BUILTIN_FSLL16,
10704 SPARC_BUILTIN_FSLAS16,
10705 SPARC_BUILTIN_FSRL16,
10706 SPARC_BUILTIN_FSRA16,
10707 SPARC_BUILTIN_FSLL32,
10708 SPARC_BUILTIN_FSLAS32,
10709 SPARC_BUILTIN_FSRL32,
10710 SPARC_BUILTIN_FSRA32,
10711 SPARC_BUILTIN_PDISTN,
10712 SPARC_BUILTIN_FMEAN16,
10713 SPARC_BUILTIN_FPADD64,
10714 SPARC_BUILTIN_FPSUB64,
10715 SPARC_BUILTIN_FPADDS16,
10716 SPARC_BUILTIN_FPADDS16S,
10717 SPARC_BUILTIN_FPSUBS16,
10718 SPARC_BUILTIN_FPSUBS16S,
10719 SPARC_BUILTIN_FPADDS32,
10720 SPARC_BUILTIN_FPADDS32S,
10721 SPARC_BUILTIN_FPSUBS32,
10722 SPARC_BUILTIN_FPSUBS32S,
10723 SPARC_BUILTIN_FUCMPLE8,
10724 SPARC_BUILTIN_FUCMPNE8,
10725 SPARC_BUILTIN_FUCMPGT8,
10726 SPARC_BUILTIN_FUCMPEQ8,
10727 SPARC_BUILTIN_FHADDS,
10728 SPARC_BUILTIN_FHADDD,
10729 SPARC_BUILTIN_FHSUBS,
10730 SPARC_BUILTIN_FHSUBD,
10731 SPARC_BUILTIN_FNHADDS,
10732 SPARC_BUILTIN_FNHADDD,
10733 SPARC_BUILTIN_UMULXHI,
10734 SPARC_BUILTIN_XMULX,
10735 SPARC_BUILTIN_XMULXHI,
10737 /* VIS 4.0 builtins. */
10738 SPARC_BUILTIN_FPADD8,
10739 SPARC_BUILTIN_FPADDS8,
10740 SPARC_BUILTIN_FPADDUS8,
10741 SPARC_BUILTIN_FPADDUS16,
10742 SPARC_BUILTIN_FPCMPLE8,
10743 SPARC_BUILTIN_FPCMPGT8,
10744 SPARC_BUILTIN_FPCMPULE16,
10745 SPARC_BUILTIN_FPCMPUGT16,
10746 SPARC_BUILTIN_FPCMPULE32,
10747 SPARC_BUILTIN_FPCMPUGT32,
10748 SPARC_BUILTIN_FPMAX8,
10749 SPARC_BUILTIN_FPMAX16,
10750 SPARC_BUILTIN_FPMAX32,
10751 SPARC_BUILTIN_FPMAXU8,
10752 SPARC_BUILTIN_FPMAXU16,
10753 SPARC_BUILTIN_FPMAXU32,
10754 SPARC_BUILTIN_FPMIN8,
10755 SPARC_BUILTIN_FPMIN16,
10756 SPARC_BUILTIN_FPMIN32,
10757 SPARC_BUILTIN_FPMINU8,
10758 SPARC_BUILTIN_FPMINU16,
10759 SPARC_BUILTIN_FPMINU32,
10760 SPARC_BUILTIN_FPSUB8,
10761 SPARC_BUILTIN_FPSUBS8,
10762 SPARC_BUILTIN_FPSUBUS8,
10763 SPARC_BUILTIN_FPSUBUS16,
10765 /* VIS 4.0B builtins. */
10767 /* Note that all the DICTUNPACK* entries should be kept
10768 contiguous. */
10769 SPARC_BUILTIN_FIRST_DICTUNPACK,
10770 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10771 SPARC_BUILTIN_DICTUNPACK16,
10772 SPARC_BUILTIN_DICTUNPACK32,
10773 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10775 /* Note that all the FPCMP*SHL entries should be kept
10776 contiguous. */
10777 SPARC_BUILTIN_FIRST_FPCMPSHL,
10778 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10779 SPARC_BUILTIN_FPCMPGT8SHL,
10780 SPARC_BUILTIN_FPCMPEQ8SHL,
10781 SPARC_BUILTIN_FPCMPNE8SHL,
10782 SPARC_BUILTIN_FPCMPLE16SHL,
10783 SPARC_BUILTIN_FPCMPGT16SHL,
10784 SPARC_BUILTIN_FPCMPEQ16SHL,
10785 SPARC_BUILTIN_FPCMPNE16SHL,
10786 SPARC_BUILTIN_FPCMPLE32SHL,
10787 SPARC_BUILTIN_FPCMPGT32SHL,
10788 SPARC_BUILTIN_FPCMPEQ32SHL,
10789 SPARC_BUILTIN_FPCMPNE32SHL,
10790 SPARC_BUILTIN_FPCMPULE8SHL,
10791 SPARC_BUILTIN_FPCMPUGT8SHL,
10792 SPARC_BUILTIN_FPCMPULE16SHL,
10793 SPARC_BUILTIN_FPCMPUGT16SHL,
10794 SPARC_BUILTIN_FPCMPULE32SHL,
10795 SPARC_BUILTIN_FPCMPUGT32SHL,
10796 SPARC_BUILTIN_FPCMPDE8SHL,
10797 SPARC_BUILTIN_FPCMPDE16SHL,
10798 SPARC_BUILTIN_FPCMPDE32SHL,
10799 SPARC_BUILTIN_FPCMPUR8SHL,
10800 SPARC_BUILTIN_FPCMPUR16SHL,
10801 SPARC_BUILTIN_FPCMPUR32SHL,
10802 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10804 SPARC_BUILTIN_MAX
10807 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10808 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10810 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10811 The instruction should require a constant operand of some sort. The
10812 function prints an error if OPVAL is not valid. */
10814 static int
10815 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10817 if (GET_CODE (opval) != CONST_INT)
10819 error ("%qs expects a constant argument", insn_data[icode].name);
10820 return false;
10823 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10825 error ("constant argument out of range for %qs", insn_data[icode].name);
10826 return false;
10828 return true;
10831 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10832 function decl or NULL_TREE if the builtin was not added. */
10834 static tree
10835 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10836 tree type)
10838 tree t
10839 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10841 if (t)
10843 sparc_builtins[code] = t;
10844 sparc_builtins_icode[code] = icode;
10847 return t;
10850 /* Likewise, but also marks the function as "const". */
10852 static tree
10853 def_builtin_const (const char *name, enum insn_code icode,
10854 enum sparc_builtins code, tree type)
10856 tree t = def_builtin (name, icode, code, type);
10858 if (t)
10859 TREE_READONLY (t) = 1;
10861 return t;
10864 /* Implement the TARGET_INIT_BUILTINS target hook.
10865 Create builtin functions for special SPARC instructions. */
10867 static void
10868 sparc_init_builtins (void)
10870 if (TARGET_FPU)
10871 sparc_fpu_init_builtins ();
10873 if (TARGET_VIS)
10874 sparc_vis_init_builtins ();
10877 /* Create builtin functions for FPU instructions. */
10879 static void
10880 sparc_fpu_init_builtins (void)
10882 tree ftype
10883 = build_function_type_list (void_type_node,
10884 build_pointer_type (unsigned_type_node), 0);
10885 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10886 SPARC_BUILTIN_LDFSR, ftype);
10887 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10888 SPARC_BUILTIN_STFSR, ftype);
10891 /* Create builtin functions for VIS instructions. */
10893 static void
10894 sparc_vis_init_builtins (void)
10896 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10897 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10898 tree v4hi = build_vector_type (intHI_type_node, 4);
10899 tree v2hi = build_vector_type (intHI_type_node, 2);
10900 tree v2si = build_vector_type (intSI_type_node, 2);
10901 tree v1si = build_vector_type (intSI_type_node, 1);
10903 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10904 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10905 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10906 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10907 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10908 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10909 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10910 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10911 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10912 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10913 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10914 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10915 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10916 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10917 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10918 v8qi, v8qi,
10919 intDI_type_node, 0);
10920 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10921 v8qi, v8qi, 0);
10922 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10923 v8qi, v8qi, 0);
10924 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10925 intSI_type_node, 0);
10926 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10927 intSI_type_node, 0);
10928 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10929 intDI_type_node, 0);
10930 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10931 intDI_type_node,
10932 intDI_type_node, 0);
10933 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10934 intSI_type_node,
10935 intSI_type_node, 0);
10936 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10937 ptr_type_node,
10938 intSI_type_node, 0);
10939 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10940 ptr_type_node,
10941 intDI_type_node, 0);
10942 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10943 ptr_type_node,
10944 ptr_type_node, 0);
10945 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10946 ptr_type_node,
10947 ptr_type_node, 0);
10948 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10949 v4hi, v4hi, 0);
10950 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10951 v2si, v2si, 0);
10952 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10953 v4hi, v4hi, 0);
10954 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10955 v2si, v2si, 0);
10956 tree void_ftype_di = build_function_type_list (void_type_node,
10957 intDI_type_node, 0);
10958 tree di_ftype_void = build_function_type_list (intDI_type_node,
10959 void_type_node, 0);
10960 tree void_ftype_si = build_function_type_list (void_type_node,
10961 intSI_type_node, 0);
10962 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10963 float_type_node,
10964 float_type_node, 0);
10965 tree df_ftype_df_df = build_function_type_list (double_type_node,
10966 double_type_node,
10967 double_type_node, 0);
10969 /* Packing and expanding vectors. */
10970 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10971 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10972 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10973 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10974 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10975 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10976 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10977 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10978 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10979 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10981 /* Multiplications. */
10982 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10983 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10984 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10985 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10986 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10987 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10988 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10989 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10990 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10991 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10992 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10993 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10994 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10995 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10997 /* Data aligning. */
10998 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10999 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11000 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11001 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11002 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11003 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11004 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11005 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11007 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11008 SPARC_BUILTIN_WRGSR, void_ftype_di);
11009 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11010 SPARC_BUILTIN_RDGSR, di_ftype_void);
11012 if (TARGET_ARCH64)
11014 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11015 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11016 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11017 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11019 else
11021 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11022 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11023 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11024 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11027 /* Pixel distance. */
11028 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11029 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11031 /* Edge handling. */
11032 if (TARGET_ARCH64)
11034 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11035 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11036 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11037 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11038 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11039 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11040 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11041 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11042 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11043 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11044 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11045 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11047 else
11049 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11050 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11051 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11052 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11053 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11054 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11055 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11056 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11057 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11058 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11059 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11060 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11063 /* Pixel compare. */
11064 if (TARGET_ARCH64)
11066 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11067 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11068 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11069 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11070 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11071 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11072 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11073 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11074 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11075 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11076 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11077 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11078 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11079 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11080 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11081 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11083 else
11085 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11086 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11087 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11088 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11089 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11090 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11091 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11092 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11093 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11094 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11095 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11096 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11097 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11098 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11099 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11100 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11103 /* Addition and subtraction. */
11104 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11105 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11106 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11107 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11108 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11109 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11110 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11111 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11112 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11113 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11114 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11115 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11116 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11117 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11118 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11119 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11121 /* Three-dimensional array addressing. */
11122 if (TARGET_ARCH64)
11124 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11125 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11126 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11127 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11128 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11129 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11131 else
11133 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11134 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11135 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11136 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11137 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11138 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11141 if (TARGET_VIS2)
11143 /* Edge handling. */
11144 if (TARGET_ARCH64)
11146 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11147 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11148 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11149 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11150 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11151 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11152 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11153 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11154 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11155 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11156 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11157 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11159 else
11161 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11162 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11163 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11164 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11165 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11166 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11167 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11168 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11169 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11170 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11171 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11172 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11175 /* Byte mask and shuffle. */
11176 if (TARGET_ARCH64)
11177 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11178 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11179 else
11180 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11181 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11182 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11183 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11184 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11185 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11186 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11187 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11188 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11189 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11192 if (TARGET_VIS3)
11194 if (TARGET_ARCH64)
11196 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11197 SPARC_BUILTIN_CMASK8, void_ftype_di);
11198 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11199 SPARC_BUILTIN_CMASK16, void_ftype_di);
11200 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11201 SPARC_BUILTIN_CMASK32, void_ftype_di);
11203 else
11205 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11206 SPARC_BUILTIN_CMASK8, void_ftype_si);
11207 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11208 SPARC_BUILTIN_CMASK16, void_ftype_si);
11209 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11210 SPARC_BUILTIN_CMASK32, void_ftype_si);
11213 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11214 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11216 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11217 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11218 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11219 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11220 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11221 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11222 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11223 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11224 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11225 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11226 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11227 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11228 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11229 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11230 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11231 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11233 if (TARGET_ARCH64)
11234 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11235 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11236 else
11237 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11238 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11240 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11241 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11242 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11243 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11244 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11245 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11247 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11248 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11249 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11250 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11251 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11252 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11253 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11254 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11255 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11256 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11257 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11258 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11259 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11260 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11261 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11262 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11264 if (TARGET_ARCH64)
11266 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11267 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11268 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11269 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11270 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11271 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11272 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11273 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11275 else
11277 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11278 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11279 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11280 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11281 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11282 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11283 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11284 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11287 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11288 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11289 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11290 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11291 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11292 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11293 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11294 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11295 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11296 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11297 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11298 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11300 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11301 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11302 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11303 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11304 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11305 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11308 if (TARGET_VIS4)
11310 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11311 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11312 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11313 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11314 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11315 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11316 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11317 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11320 if (TARGET_ARCH64)
11322 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11323 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11324 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11325 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11326 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11327 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11328 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11329 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11330 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11331 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11332 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11333 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11335 else
11337 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11338 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11339 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11340 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11341 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11342 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11343 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11344 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11345 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11346 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11347 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11348 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11351 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11352 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11353 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11354 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11355 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11356 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11357 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11358 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11359 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11360 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11361 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11362 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11363 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11364 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11365 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11366 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11367 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11368 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11369 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11370 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11371 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11372 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11373 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11374 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11375 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11376 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11377 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11378 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11379 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11380 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11381 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11382 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11385 if (TARGET_VIS4B)
11387 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11388 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11389 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11390 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11391 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11392 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11394 if (TARGET_ARCH64)
11396 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11397 v8qi, v8qi,
11398 intSI_type_node, 0);
11399 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11400 v4hi, v4hi,
11401 intSI_type_node, 0);
11402 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11403 v2si, v2si,
11404 intSI_type_node, 0);
11406 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11407 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11408 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11409 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11410 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11411 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11412 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11413 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11415 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11416 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11417 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11418 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11419 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11420 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11421 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11422 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11424 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11425 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11426 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11427 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11428 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11429 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11430 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11431 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11434 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11435 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11436 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11437 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11439 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11440 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11441 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11442 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11444 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11445 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11446 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11447 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11449 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11450 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11451 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11452 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11453 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11454 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11456 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11457 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11458 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11459 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11460 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11461 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11464 else
11466 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11467 v8qi, v8qi,
11468 intSI_type_node, 0);
11469 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11470 v4hi, v4hi,
11471 intSI_type_node, 0);
11472 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11473 v2si, v2si,
11474 intSI_type_node, 0);
11476 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11477 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11478 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11479 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11480 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11481 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11482 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11483 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11485 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11486 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11487 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11488 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11489 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11490 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11491 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11492 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11494 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11495 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11496 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11497 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11498 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11499 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11500 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11501 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11504 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11505 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11506 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11507 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11509 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11510 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11511 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11512 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11514 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11515 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11516 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11517 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11519 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11520 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11521 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11522 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11523 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11524 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11526 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11527 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11528 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11529 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11530 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11531 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11536 /* Implement TARGET_BUILTIN_DECL hook. */
11538 static tree
11539 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11541 if (code >= SPARC_BUILTIN_MAX)
11542 return error_mark_node;
11544 return sparc_builtins[code];
11547 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11549 static rtx
11550 sparc_expand_builtin (tree exp, rtx target,
11551 rtx subtarget ATTRIBUTE_UNUSED,
11552 machine_mode tmode ATTRIBUTE_UNUSED,
11553 int ignore ATTRIBUTE_UNUSED)
11555 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11556 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11557 enum insn_code icode = sparc_builtins_icode[code];
11558 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11559 call_expr_arg_iterator iter;
11560 int arg_count = 0;
11561 rtx pat, op[4];
11562 tree arg;
11564 if (nonvoid)
11566 machine_mode tmode = insn_data[icode].operand[0].mode;
11567 if (!target
11568 || GET_MODE (target) != tmode
11569 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11570 op[0] = gen_reg_rtx (tmode);
11571 else
11572 op[0] = target;
11575 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11577 const struct insn_operand_data *insn_op;
11578 int idx;
11580 if (arg == error_mark_node)
11581 return NULL_RTX;
11583 arg_count++;
11584 idx = arg_count - !nonvoid;
11585 insn_op = &insn_data[icode].operand[idx];
11586 op[arg_count] = expand_normal (arg);
11588 /* Some of the builtins require constant arguments. We check
11589 for this here. */
11590 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11591 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11592 && arg_count == 3)
11593 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11594 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11595 && arg_count == 2))
11597 if (!check_constant_argument (icode, idx, op[arg_count]))
11598 return const0_rtx;
11601 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11603 if (!address_operand (op[arg_count], SImode))
11605 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11606 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11608 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11611 else if (insn_op->mode == V1DImode
11612 && GET_MODE (op[arg_count]) == DImode)
11613 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11615 else if (insn_op->mode == V1SImode
11616 && GET_MODE (op[arg_count]) == SImode)
11617 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11619 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11620 insn_op->mode))
11621 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11624 switch (arg_count)
11626 case 0:
11627 pat = GEN_FCN (icode) (op[0]);
11628 break;
11629 case 1:
11630 if (nonvoid)
11631 pat = GEN_FCN (icode) (op[0], op[1]);
11632 else
11633 pat = GEN_FCN (icode) (op[1]);
11634 break;
11635 case 2:
11636 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11637 break;
11638 case 3:
11639 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11640 break;
11641 default:
11642 gcc_unreachable ();
11645 if (!pat)
11646 return NULL_RTX;
11648 emit_insn (pat);
11650 return (nonvoid ? op[0] : const0_rtx);
11653 /* Return the upper 16 bits of the 8x16 multiplication. */
11655 static int
11656 sparc_vis_mul8x16 (int e8, int e16)
11658 return (e8 * e16 + 128) / 256;
11661 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11662 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11664 static void
11665 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11666 tree inner_type, tree cst0, tree cst1)
11668 unsigned i, num = VECTOR_CST_NELTS (cst0);
11669 int scale;
11671 switch (fncode)
11673 case SPARC_BUILTIN_FMUL8X16:
11674 for (i = 0; i < num; ++i)
11676 int val
11677 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11678 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11679 n_elts->quick_push (build_int_cst (inner_type, val));
11681 break;
11683 case SPARC_BUILTIN_FMUL8X16AU:
11684 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11686 for (i = 0; i < num; ++i)
11688 int val
11689 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11690 scale);
11691 n_elts->quick_push (build_int_cst (inner_type, val));
11693 break;
11695 case SPARC_BUILTIN_FMUL8X16AL:
11696 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11698 for (i = 0; i < num; ++i)
11700 int val
11701 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11702 scale);
11703 n_elts->quick_push (build_int_cst (inner_type, val));
11705 break;
11707 default:
11708 gcc_unreachable ();
11712 /* Implement TARGET_FOLD_BUILTIN hook.
11714 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11715 result of the function call is ignored. NULL_TREE is returned if the
11716 function could not be folded. */
11718 static tree
11719 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11720 tree *args, bool ignore)
11722 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11723 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11724 tree arg0, arg1, arg2;
11726 if (ignore)
11727 switch (code)
11729 case SPARC_BUILTIN_LDFSR:
11730 case SPARC_BUILTIN_STFSR:
11731 case SPARC_BUILTIN_ALIGNADDR:
11732 case SPARC_BUILTIN_WRGSR:
11733 case SPARC_BUILTIN_BMASK:
11734 case SPARC_BUILTIN_CMASK8:
11735 case SPARC_BUILTIN_CMASK16:
11736 case SPARC_BUILTIN_CMASK32:
11737 break;
11739 default:
11740 return build_zero_cst (rtype);
11743 switch (code)
11745 case SPARC_BUILTIN_FEXPAND:
11746 arg0 = args[0];
11747 STRIP_NOPS (arg0);
11749 if (TREE_CODE (arg0) == VECTOR_CST)
11751 tree inner_type = TREE_TYPE (rtype);
11752 unsigned i;
11754 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11755 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11757 unsigned HOST_WIDE_INT val
11758 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11759 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11761 return n_elts.build ();
11763 break;
11765 case SPARC_BUILTIN_FMUL8X16:
11766 case SPARC_BUILTIN_FMUL8X16AU:
11767 case SPARC_BUILTIN_FMUL8X16AL:
11768 arg0 = args[0];
11769 arg1 = args[1];
11770 STRIP_NOPS (arg0);
11771 STRIP_NOPS (arg1);
11773 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11775 tree inner_type = TREE_TYPE (rtype);
11776 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11777 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11778 return n_elts.build ();
11780 break;
11782 case SPARC_BUILTIN_FPMERGE:
11783 arg0 = args[0];
11784 arg1 = args[1];
11785 STRIP_NOPS (arg0);
11786 STRIP_NOPS (arg1);
11788 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11790 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11791 unsigned i;
11792 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11794 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11795 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11798 return n_elts.build ();
11800 break;
11802 case SPARC_BUILTIN_PDIST:
11803 case SPARC_BUILTIN_PDISTN:
11804 arg0 = args[0];
11805 arg1 = args[1];
11806 STRIP_NOPS (arg0);
11807 STRIP_NOPS (arg1);
11808 if (code == SPARC_BUILTIN_PDIST)
11810 arg2 = args[2];
11811 STRIP_NOPS (arg2);
11813 else
11814 arg2 = integer_zero_node;
11816 if (TREE_CODE (arg0) == VECTOR_CST
11817 && TREE_CODE (arg1) == VECTOR_CST
11818 && TREE_CODE (arg2) == INTEGER_CST)
11820 bool overflow = false;
11821 widest_int result = wi::to_widest (arg2);
11822 widest_int tmp;
11823 unsigned i;
11825 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11827 tree e0 = VECTOR_CST_ELT (arg0, i);
11828 tree e1 = VECTOR_CST_ELT (arg1, i);
11830 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11832 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11833 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11834 if (wi::neg_p (tmp))
11835 tmp = wi::neg (tmp, &neg2_ovf);
11836 else
11837 neg2_ovf = false;
11838 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11839 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11842 gcc_assert (!overflow);
11844 return wide_int_to_tree (rtype, result);
11847 default:
11848 break;
11851 return NULL_TREE;
11854 /* ??? This duplicates information provided to the compiler by the
11855 ??? scheduler description. Some day, teach genautomata to output
11856 ??? the latencies and then CSE will just use that. */
11858 static bool
11859 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11860 int opno ATTRIBUTE_UNUSED,
11861 int *total, bool speed ATTRIBUTE_UNUSED)
11863 int code = GET_CODE (x);
11864 bool float_mode_p = FLOAT_MODE_P (mode);
11866 switch (code)
11868 case CONST_INT:
11869 if (SMALL_INT (x))
11870 *total = 0;
11871 else
11872 *total = 2;
11873 return true;
11875 case CONST_WIDE_INT:
11876 *total = 0;
11877 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11878 *total += 2;
11879 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11880 *total += 2;
11881 return true;
11883 case HIGH:
11884 *total = 2;
11885 return true;
11887 case CONST:
11888 case LABEL_REF:
11889 case SYMBOL_REF:
11890 *total = 4;
11891 return true;
11893 case CONST_DOUBLE:
11894 *total = 8;
11895 return true;
11897 case MEM:
11898 /* If outer-code was a sign or zero extension, a cost
11899 of COSTS_N_INSNS (1) was already added in. This is
11900 why we are subtracting it back out. */
11901 if (outer_code == ZERO_EXTEND)
11903 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11905 else if (outer_code == SIGN_EXTEND)
11907 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11909 else if (float_mode_p)
11911 *total = sparc_costs->float_load;
11913 else
11915 *total = sparc_costs->int_load;
11918 return true;
11920 case PLUS:
11921 case MINUS:
11922 if (float_mode_p)
11923 *total = sparc_costs->float_plusminus;
11924 else
11925 *total = COSTS_N_INSNS (1);
11926 return false;
11928 case FMA:
11930 rtx sub;
11932 gcc_assert (float_mode_p);
11933 *total = sparc_costs->float_mul;
11935 sub = XEXP (x, 0);
11936 if (GET_CODE (sub) == NEG)
11937 sub = XEXP (sub, 0);
11938 *total += rtx_cost (sub, mode, FMA, 0, speed);
11940 sub = XEXP (x, 2);
11941 if (GET_CODE (sub) == NEG)
11942 sub = XEXP (sub, 0);
11943 *total += rtx_cost (sub, mode, FMA, 2, speed);
11944 return true;
11947 case MULT:
11948 if (float_mode_p)
11949 *total = sparc_costs->float_mul;
11950 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11951 *total = COSTS_N_INSNS (25);
11952 else
11954 int bit_cost;
11956 bit_cost = 0;
11957 if (sparc_costs->int_mul_bit_factor)
11959 int nbits;
11961 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11963 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11964 for (nbits = 0; value != 0; value &= value - 1)
11965 nbits++;
11967 else
11968 nbits = 7;
11970 if (nbits < 3)
11971 nbits = 3;
11972 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11973 bit_cost = COSTS_N_INSNS (bit_cost);
11976 if (mode == DImode || !TARGET_HARD_MUL)
11977 *total = sparc_costs->int_mulX + bit_cost;
11978 else
11979 *total = sparc_costs->int_mul + bit_cost;
11981 return false;
11983 case ASHIFT:
11984 case ASHIFTRT:
11985 case LSHIFTRT:
11986 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11987 return false;
11989 case DIV:
11990 case UDIV:
11991 case MOD:
11992 case UMOD:
11993 if (float_mode_p)
11995 if (mode == DFmode)
11996 *total = sparc_costs->float_div_df;
11997 else
11998 *total = sparc_costs->float_div_sf;
12000 else
12002 if (mode == DImode)
12003 *total = sparc_costs->int_divX;
12004 else
12005 *total = sparc_costs->int_div;
12007 return false;
12009 case NEG:
12010 if (! float_mode_p)
12012 *total = COSTS_N_INSNS (1);
12013 return false;
12015 /* FALLTHRU */
12017 case ABS:
12018 case FLOAT:
12019 case UNSIGNED_FLOAT:
12020 case FIX:
12021 case UNSIGNED_FIX:
12022 case FLOAT_EXTEND:
12023 case FLOAT_TRUNCATE:
12024 *total = sparc_costs->float_move;
12025 return false;
12027 case SQRT:
12028 if (mode == DFmode)
12029 *total = sparc_costs->float_sqrt_df;
12030 else
12031 *total = sparc_costs->float_sqrt_sf;
12032 return false;
12034 case COMPARE:
12035 if (float_mode_p)
12036 *total = sparc_costs->float_cmp;
12037 else
12038 *total = COSTS_N_INSNS (1);
12039 return false;
12041 case IF_THEN_ELSE:
12042 if (float_mode_p)
12043 *total = sparc_costs->float_cmove;
12044 else
12045 *total = sparc_costs->int_cmove;
12046 return false;
12048 case IOR:
12049 /* Handle the NAND vector patterns. */
12050 if (sparc_vector_mode_supported_p (mode)
12051 && GET_CODE (XEXP (x, 0)) == NOT
12052 && GET_CODE (XEXP (x, 1)) == NOT)
12054 *total = COSTS_N_INSNS (1);
12055 return true;
12057 else
12058 return false;
12060 default:
12061 return false;
12065 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12067 static inline bool
12068 general_or_i64_p (reg_class_t rclass)
12070 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12073 /* Implement TARGET_REGISTER_MOVE_COST. */
12075 static int
12076 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12077 reg_class_t from, reg_class_t to)
12079 bool need_memory = false;
12081 /* This helps postreload CSE to eliminate redundant comparisons. */
12082 if (from == NO_REGS || to == NO_REGS)
12083 return 100;
12085 if (from == FPCC_REGS || to == FPCC_REGS)
12086 need_memory = true;
12087 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12088 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12090 if (TARGET_VIS3)
12092 int size = GET_MODE_SIZE (mode);
12093 if (size == 8 || size == 4)
12095 if (! TARGET_ARCH32 || size == 4)
12096 return 4;
12097 else
12098 return 6;
12101 need_memory = true;
12104 if (need_memory)
12106 if (sparc_cpu == PROCESSOR_ULTRASPARC
12107 || sparc_cpu == PROCESSOR_ULTRASPARC3
12108 || sparc_cpu == PROCESSOR_NIAGARA
12109 || sparc_cpu == PROCESSOR_NIAGARA2
12110 || sparc_cpu == PROCESSOR_NIAGARA3
12111 || sparc_cpu == PROCESSOR_NIAGARA4
12112 || sparc_cpu == PROCESSOR_NIAGARA7
12113 || sparc_cpu == PROCESSOR_M8)
12114 return 12;
12116 return 6;
12119 return 2;
12122 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12123 This is achieved by means of a manual dynamic stack space allocation in
12124 the current frame. We make the assumption that SEQ doesn't contain any
12125 function calls, with the possible exception of calls to the GOT helper. */
12127 static void
12128 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12130 /* We must preserve the lowest 16 words for the register save area. */
12131 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12132 /* We really need only 2 words of fresh stack space. */
12133 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12135 rtx slot
12136 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12137 SPARC_STACK_BIAS + offset));
12139 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12140 emit_insn (gen_rtx_SET (slot, reg));
12141 if (reg2)
12142 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12143 reg2));
12144 emit_insn (seq);
12145 if (reg2)
12146 emit_insn (gen_rtx_SET (reg2,
12147 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12148 emit_insn (gen_rtx_SET (reg, slot));
12149 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12152 /* Output the assembler code for a thunk function. THUNK_DECL is the
12153 declaration for the thunk function itself, FUNCTION is the decl for
12154 the target function. DELTA is an immediate constant offset to be
12155 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12156 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12158 static void
12159 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12160 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12161 tree function)
12163 rtx this_rtx, funexp;
12164 rtx_insn *insn;
12165 unsigned int int_arg_first;
12167 reload_completed = 1;
12168 epilogue_completed = 1;
12170 emit_note (NOTE_INSN_PROLOGUE_END);
12172 if (TARGET_FLAT)
12174 sparc_leaf_function_p = 1;
12176 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12178 else if (flag_delayed_branch)
12180 /* We will emit a regular sibcall below, so we need to instruct
12181 output_sibcall that we are in a leaf function. */
12182 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12184 /* This will cause final.c to invoke leaf_renumber_regs so we
12185 must behave as if we were in a not-yet-leafified function. */
12186 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12188 else
12190 /* We will emit the sibcall manually below, so we will need to
12191 manually spill non-leaf registers. */
12192 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12194 /* We really are in a leaf function. */
12195 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12198 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12199 returns a structure, the structure return pointer is there instead. */
12200 if (TARGET_ARCH64
12201 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12202 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12203 else
12204 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12206 /* Add DELTA. When possible use a plain add, otherwise load it into
12207 a register first. */
12208 if (delta)
12210 rtx delta_rtx = GEN_INT (delta);
12212 if (! SPARC_SIMM13_P (delta))
12214 rtx scratch = gen_rtx_REG (Pmode, 1);
12215 emit_move_insn (scratch, delta_rtx);
12216 delta_rtx = scratch;
12219 /* THIS_RTX += DELTA. */
12220 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12223 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12224 if (vcall_offset)
12226 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12227 rtx scratch = gen_rtx_REG (Pmode, 1);
12229 gcc_assert (vcall_offset < 0);
12231 /* SCRATCH = *THIS_RTX. */
12232 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12234 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12235 may not have any available scratch register at this point. */
12236 if (SPARC_SIMM13_P (vcall_offset))
12238 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12239 else if (! fixed_regs[5]
12240 /* The below sequence is made up of at least 2 insns,
12241 while the default method may need only one. */
12242 && vcall_offset < -8192)
12244 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12245 emit_move_insn (scratch2, vcall_offset_rtx);
12246 vcall_offset_rtx = scratch2;
12248 else
12250 rtx increment = GEN_INT (-4096);
12252 /* VCALL_OFFSET is a negative number whose typical range can be
12253 estimated as -32768..0 in 32-bit mode. In almost all cases
12254 it is therefore cheaper to emit multiple add insns than
12255 spilling and loading the constant into a register (at least
12256 6 insns). */
12257 while (! SPARC_SIMM13_P (vcall_offset))
12259 emit_insn (gen_add2_insn (scratch, increment));
12260 vcall_offset += 4096;
12262 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12265 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12266 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12267 gen_rtx_PLUS (Pmode,
12268 scratch,
12269 vcall_offset_rtx)));
12271 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12272 emit_insn (gen_add2_insn (this_rtx, scratch));
12275 /* Generate a tail call to the target function. */
12276 if (! TREE_USED (function))
12278 assemble_external (function);
12279 TREE_USED (function) = 1;
12281 funexp = XEXP (DECL_RTL (function), 0);
12283 if (flag_delayed_branch)
12285 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12286 insn = emit_call_insn (gen_sibcall (funexp));
12287 SIBLING_CALL_P (insn) = 1;
12289 else
12291 /* The hoops we have to jump through in order to generate a sibcall
12292 without using delay slots... */
12293 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12295 if (flag_pic)
12297 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12298 start_sequence ();
12299 load_got_register (); /* clobbers %o7 */
12300 scratch = sparc_legitimize_pic_address (funexp, scratch);
12301 seq = get_insns ();
12302 end_sequence ();
12303 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12305 else if (TARGET_ARCH32)
12307 emit_insn (gen_rtx_SET (scratch,
12308 gen_rtx_HIGH (SImode, funexp)));
12309 emit_insn (gen_rtx_SET (scratch,
12310 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12312 else /* TARGET_ARCH64 */
12314 switch (sparc_cmodel)
12316 case CM_MEDLOW:
12317 case CM_MEDMID:
12318 /* The destination can serve as a temporary. */
12319 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12320 break;
12322 case CM_MEDANY:
12323 case CM_EMBMEDANY:
12324 /* The destination cannot serve as a temporary. */
12325 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12326 start_sequence ();
12327 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12328 seq = get_insns ();
12329 end_sequence ();
12330 emit_and_preserve (seq, spill_reg, 0);
12331 break;
12333 default:
12334 gcc_unreachable ();
12338 emit_jump_insn (gen_indirect_jump (scratch));
12341 emit_barrier ();
12343 /* Run just enough of rest_of_compilation to get the insns emitted.
12344 There's not really enough bulk here to make other passes such as
12345 instruction scheduling worth while. Note that use_thunk calls
12346 assemble_start_function and assemble_end_function. */
12347 insn = get_insns ();
12348 shorten_branches (insn);
12349 final_start_function (insn, file, 1);
12350 final (insn, file, 1);
12351 final_end_function ();
12353 reload_completed = 0;
12354 epilogue_completed = 0;
12357 /* Return true if sparc_output_mi_thunk would be able to output the
12358 assembler code for the thunk function specified by the arguments
12359 it is passed, and false otherwise. */
12360 static bool
12361 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12362 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12363 HOST_WIDE_INT vcall_offset,
12364 const_tree function ATTRIBUTE_UNUSED)
12366 /* Bound the loop used in the default method above. */
12367 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12370 /* How to allocate a 'struct machine_function'. */
12372 static struct machine_function *
12373 sparc_init_machine_status (void)
12375 return ggc_cleared_alloc<machine_function> ();
12378 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12379 We need to emit DTP-relative relocations. */
12381 static void
12382 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12384 switch (size)
12386 case 4:
12387 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12388 break;
12389 case 8:
12390 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12391 break;
12392 default:
12393 gcc_unreachable ();
12395 output_addr_const (file, x);
12396 fputs (")", file);
12399 /* Do whatever processing is required at the end of a file. */
12401 static void
12402 sparc_file_end (void)
12404 /* If we need to emit the special GOT helper function, do so now. */
12405 if (got_helper_rtx)
12407 const char *name = XSTR (got_helper_rtx, 0);
12408 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12409 #ifdef DWARF2_UNWIND_INFO
12410 bool do_cfi;
12411 #endif
12413 if (USE_HIDDEN_LINKONCE)
12415 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12416 get_identifier (name),
12417 build_function_type_list (void_type_node,
12418 NULL_TREE));
12419 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12420 NULL_TREE, void_type_node);
12421 TREE_PUBLIC (decl) = 1;
12422 TREE_STATIC (decl) = 1;
12423 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12424 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12425 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12426 resolve_unique_section (decl, 0, flag_function_sections);
12427 allocate_struct_function (decl, true);
12428 cfun->is_thunk = 1;
12429 current_function_decl = decl;
12430 init_varasm_status ();
12431 assemble_start_function (decl, name);
12433 else
12435 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12436 switch_to_section (text_section);
12437 if (align > 0)
12438 ASM_OUTPUT_ALIGN (asm_out_file, align);
12439 ASM_OUTPUT_LABEL (asm_out_file, name);
12442 #ifdef DWARF2_UNWIND_INFO
12443 do_cfi = dwarf2out_do_cfi_asm ();
12444 if (do_cfi)
12445 fprintf (asm_out_file, "\t.cfi_startproc\n");
12446 #endif
12447 if (flag_delayed_branch)
12448 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12449 reg_name, reg_name);
12450 else
12451 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12452 reg_name, reg_name);
12453 #ifdef DWARF2_UNWIND_INFO
12454 if (do_cfi)
12455 fprintf (asm_out_file, "\t.cfi_endproc\n");
12456 #endif
12459 if (NEED_INDICATE_EXEC_STACK)
12460 file_end_indicate_exec_stack ();
12462 #ifdef TARGET_SOLARIS
12463 solaris_file_end ();
12464 #endif
12467 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12468 /* Implement TARGET_MANGLE_TYPE. */
12470 static const char *
12471 sparc_mangle_type (const_tree type)
12473 if (TARGET_ARCH32
12474 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12475 && TARGET_LONG_DOUBLE_128)
12476 return "g";
12478 /* For all other types, use normal C++ mangling. */
12479 return NULL;
12481 #endif
12483 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12484 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12485 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12487 void
12488 sparc_emit_membar_for_model (enum memmodel model,
12489 int load_store, int before_after)
12491 /* Bits for the MEMBAR mmask field. */
12492 const int LoadLoad = 1;
12493 const int StoreLoad = 2;
12494 const int LoadStore = 4;
12495 const int StoreStore = 8;
12497 int mm = 0, implied = 0;
12499 switch (sparc_memory_model)
12501 case SMM_SC:
12502 /* Sequential Consistency. All memory transactions are immediately
12503 visible in sequential execution order. No barriers needed. */
12504 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12505 break;
12507 case SMM_TSO:
12508 /* Total Store Ordering: all memory transactions with store semantics
12509 are followed by an implied StoreStore. */
12510 implied |= StoreStore;
12512 /* If we're not looking for a raw barrer (before+after), then atomic
12513 operations get the benefit of being both load and store. */
12514 if (load_store == 3 && before_after == 1)
12515 implied |= StoreLoad;
12516 /* FALLTHRU */
12518 case SMM_PSO:
12519 /* Partial Store Ordering: all memory transactions with load semantics
12520 are followed by an implied LoadLoad | LoadStore. */
12521 implied |= LoadLoad | LoadStore;
12523 /* If we're not looking for a raw barrer (before+after), then atomic
12524 operations get the benefit of being both load and store. */
12525 if (load_store == 3 && before_after == 2)
12526 implied |= StoreLoad | StoreStore;
12527 /* FALLTHRU */
12529 case SMM_RMO:
12530 /* Relaxed Memory Ordering: no implicit bits. */
12531 break;
12533 default:
12534 gcc_unreachable ();
12537 if (before_after & 1)
12539 if (is_mm_release (model) || is_mm_acq_rel (model)
12540 || is_mm_seq_cst (model))
12542 if (load_store & 1)
12543 mm |= LoadLoad | StoreLoad;
12544 if (load_store & 2)
12545 mm |= LoadStore | StoreStore;
12548 if (before_after & 2)
12550 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12551 || is_mm_seq_cst (model))
12553 if (load_store & 1)
12554 mm |= LoadLoad | LoadStore;
12555 if (load_store & 2)
12556 mm |= StoreLoad | StoreStore;
12560 /* Remove the bits implied by the system memory model. */
12561 mm &= ~implied;
12563 /* For raw barriers (before+after), always emit a barrier.
12564 This will become a compile-time barrier if needed. */
12565 if (mm || before_after == 3)
12566 emit_insn (gen_membar (GEN_INT (mm)));
12569 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12570 compare and swap on the word containing the byte or half-word. */
12572 static void
12573 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12574 rtx oldval, rtx newval)
12576 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12577 rtx addr = gen_reg_rtx (Pmode);
12578 rtx off = gen_reg_rtx (SImode);
12579 rtx oldv = gen_reg_rtx (SImode);
12580 rtx newv = gen_reg_rtx (SImode);
12581 rtx oldvalue = gen_reg_rtx (SImode);
12582 rtx newvalue = gen_reg_rtx (SImode);
12583 rtx res = gen_reg_rtx (SImode);
12584 rtx resv = gen_reg_rtx (SImode);
12585 rtx memsi, val, mask, cc;
12587 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12589 if (Pmode != SImode)
12590 addr1 = gen_lowpart (SImode, addr1);
12591 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12593 memsi = gen_rtx_MEM (SImode, addr);
12594 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12595 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12597 val = copy_to_reg (memsi);
12599 emit_insn (gen_rtx_SET (off,
12600 gen_rtx_XOR (SImode, off,
12601 GEN_INT (GET_MODE (mem) == QImode
12602 ? 3 : 2))));
12604 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12606 if (GET_MODE (mem) == QImode)
12607 mask = force_reg (SImode, GEN_INT (0xff));
12608 else
12609 mask = force_reg (SImode, GEN_INT (0xffff));
12611 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12613 emit_insn (gen_rtx_SET (val,
12614 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12615 val)));
12617 oldval = gen_lowpart (SImode, oldval);
12618 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12620 newval = gen_lowpart_common (SImode, newval);
12621 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12623 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12625 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12627 rtx_code_label *end_label = gen_label_rtx ();
12628 rtx_code_label *loop_label = gen_label_rtx ();
12629 emit_label (loop_label);
12631 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12633 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12635 emit_move_insn (bool_result, const1_rtx);
12637 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12639 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12641 emit_insn (gen_rtx_SET (resv,
12642 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12643 res)));
12645 emit_move_insn (bool_result, const0_rtx);
12647 cc = gen_compare_reg_1 (NE, resv, val);
12648 emit_insn (gen_rtx_SET (val, resv));
12650 /* Use cbranchcc4 to separate the compare and branch! */
12651 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12652 cc, const0_rtx, loop_label));
12654 emit_label (end_label);
12656 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12658 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12660 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12663 /* Expand code to perform a compare-and-swap. */
12665 void
12666 sparc_expand_compare_and_swap (rtx operands[])
12668 rtx bval, retval, mem, oldval, newval;
12669 machine_mode mode;
12670 enum memmodel model;
12672 bval = operands[0];
12673 retval = operands[1];
12674 mem = operands[2];
12675 oldval = operands[3];
12676 newval = operands[4];
12677 model = (enum memmodel) INTVAL (operands[6]);
12678 mode = GET_MODE (mem);
12680 sparc_emit_membar_for_model (model, 3, 1);
12682 if (reg_overlap_mentioned_p (retval, oldval))
12683 oldval = copy_to_reg (oldval);
12685 if (mode == QImode || mode == HImode)
12686 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12687 else
12689 rtx (*gen) (rtx, rtx, rtx, rtx);
12690 rtx x;
12692 if (mode == SImode)
12693 gen = gen_atomic_compare_and_swapsi_1;
12694 else
12695 gen = gen_atomic_compare_and_swapdi_1;
12696 emit_insn (gen (retval, mem, oldval, newval));
12698 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12699 if (x != bval)
12700 convert_move (bval, x, 1);
12703 sparc_emit_membar_for_model (model, 3, 2);
12706 void
12707 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12709 rtx t_1, t_2, t_3;
12711 sel = gen_lowpart (DImode, sel);
12712 switch (vmode)
12714 case E_V2SImode:
12715 /* inp = xxxxxxxAxxxxxxxB */
12716 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12717 NULL_RTX, 1, OPTAB_DIRECT);
12718 /* t_1 = ....xxxxxxxAxxx. */
12719 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12720 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12721 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12722 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12723 /* sel = .......B */
12724 /* t_1 = ...A.... */
12725 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12726 /* sel = ...A...B */
12727 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12728 /* sel = AAAABBBB * 4 */
12729 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12730 /* sel = { A*4, A*4+1, A*4+2, ... } */
12731 break;
12733 case E_V4HImode:
12734 /* inp = xxxAxxxBxxxCxxxD */
12735 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12736 NULL_RTX, 1, OPTAB_DIRECT);
12737 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12738 NULL_RTX, 1, OPTAB_DIRECT);
12739 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12740 NULL_RTX, 1, OPTAB_DIRECT);
12741 /* t_1 = ..xxxAxxxBxxxCxx */
12742 /* t_2 = ....xxxAxxxBxxxC */
12743 /* t_3 = ......xxxAxxxBxx */
12744 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12745 GEN_INT (0x07),
12746 NULL_RTX, 1, OPTAB_DIRECT);
12747 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12748 GEN_INT (0x0700),
12749 NULL_RTX, 1, OPTAB_DIRECT);
12750 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12751 GEN_INT (0x070000),
12752 NULL_RTX, 1, OPTAB_DIRECT);
12753 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12754 GEN_INT (0x07000000),
12755 NULL_RTX, 1, OPTAB_DIRECT);
12756 /* sel = .......D */
12757 /* t_1 = .....C.. */
12758 /* t_2 = ...B.... */
12759 /* t_3 = .A...... */
12760 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12761 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12762 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12763 /* sel = .A.B.C.D */
12764 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12765 /* sel = AABBCCDD * 2 */
12766 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12767 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12768 break;
12770 case E_V8QImode:
12771 /* input = xAxBxCxDxExFxGxH */
12772 sel = expand_simple_binop (DImode, AND, sel,
12773 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12774 | 0x0f0f0f0f),
12775 NULL_RTX, 1, OPTAB_DIRECT);
12776 /* sel = .A.B.C.D.E.F.G.H */
12777 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12778 NULL_RTX, 1, OPTAB_DIRECT);
12779 /* t_1 = ..A.B.C.D.E.F.G. */
12780 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12781 NULL_RTX, 1, OPTAB_DIRECT);
12782 /* sel = .AABBCCDDEEFFGGH */
12783 sel = expand_simple_binop (DImode, AND, sel,
12784 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12785 | 0xff00ff),
12786 NULL_RTX, 1, OPTAB_DIRECT);
12787 /* sel = ..AB..CD..EF..GH */
12788 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12789 NULL_RTX, 1, OPTAB_DIRECT);
12790 /* t_1 = ....AB..CD..EF.. */
12791 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12792 NULL_RTX, 1, OPTAB_DIRECT);
12793 /* sel = ..ABABCDCDEFEFGH */
12794 sel = expand_simple_binop (DImode, AND, sel,
12795 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12796 NULL_RTX, 1, OPTAB_DIRECT);
12797 /* sel = ....ABCD....EFGH */
12798 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12799 NULL_RTX, 1, OPTAB_DIRECT);
12800 /* t_1 = ........ABCD.... */
12801 sel = gen_lowpart (SImode, sel);
12802 t_1 = gen_lowpart (SImode, t_1);
12803 break;
12805 default:
12806 gcc_unreachable ();
12809 /* Always perform the final addition/merge within the bmask insn. */
12810 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12813 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12815 static bool
12816 sparc_frame_pointer_required (void)
12818 /* If the stack pointer is dynamically modified in the function, it cannot
12819 serve as the frame pointer. */
12820 if (cfun->calls_alloca)
12821 return true;
12823 /* If the function receives nonlocal gotos, it needs to save the frame
12824 pointer in the nonlocal_goto_save_area object. */
12825 if (cfun->has_nonlocal_label)
12826 return true;
12828 /* In flat mode, that's it. */
12829 if (TARGET_FLAT)
12830 return false;
12832 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12833 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12834 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12837 /* The way this is structured, we can't eliminate SFP in favor of SP
12838 if the frame pointer is required: we want to use the SFP->HFP elimination
12839 in that case. But the test in update_eliminables doesn't know we are
12840 assuming below that we only do the former elimination. */
12842 static bool
12843 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12845 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12848 /* Return the hard frame pointer directly to bypass the stack bias. */
12850 static rtx
12851 sparc_builtin_setjmp_frame_value (void)
12853 return hard_frame_pointer_rtx;
12856 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12857 they won't be allocated. */
12859 static void
12860 sparc_conditional_register_usage (void)
12862 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12864 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12865 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12867 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12868 /* then honor it. */
12869 if (TARGET_ARCH32 && fixed_regs[5])
12870 fixed_regs[5] = 1;
12871 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12872 fixed_regs[5] = 0;
12873 if (! TARGET_V9)
12875 int regno;
12876 for (regno = SPARC_FIRST_V9_FP_REG;
12877 regno <= SPARC_LAST_V9_FP_REG;
12878 regno++)
12879 fixed_regs[regno] = 1;
12880 /* %fcc0 is used by v8 and v9. */
12881 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12882 regno <= SPARC_LAST_V9_FCC_REG;
12883 regno++)
12884 fixed_regs[regno] = 1;
12886 if (! TARGET_FPU)
12888 int regno;
12889 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12890 fixed_regs[regno] = 1;
12892 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12893 /* then honor it. Likewise with g3 and g4. */
12894 if (fixed_regs[2] == 2)
12895 fixed_regs[2] = ! TARGET_APP_REGS;
12896 if (fixed_regs[3] == 2)
12897 fixed_regs[3] = ! TARGET_APP_REGS;
12898 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12899 fixed_regs[4] = ! TARGET_APP_REGS;
12900 else if (TARGET_CM_EMBMEDANY)
12901 fixed_regs[4] = 1;
12902 else if (fixed_regs[4] == 2)
12903 fixed_regs[4] = 0;
12904 if (TARGET_FLAT)
12906 int regno;
12907 /* Disable leaf functions. */
12908 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12909 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12910 leaf_reg_remap [regno] = regno;
12912 if (TARGET_VIS)
12913 global_regs[SPARC_GSR_REG] = 1;
12916 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12918 - We can't load constants into FP registers.
12919 - We can't load FP constants into integer registers when soft-float,
12920 because there is no soft-float pattern with a r/F constraint.
12921 - We can't load FP constants into integer registers for TFmode unless
12922 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12923 - Try and reload integer constants (symbolic or otherwise) back into
12924 registers directly, rather than having them dumped to memory. */
12926 static reg_class_t
12927 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12929 machine_mode mode = GET_MODE (x);
12930 if (CONSTANT_P (x))
12932 if (FP_REG_CLASS_P (rclass)
12933 || rclass == GENERAL_OR_FP_REGS
12934 || rclass == GENERAL_OR_EXTRA_FP_REGS
12935 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12936 || (mode == TFmode && ! const_zero_operand (x, mode)))
12937 return NO_REGS;
12939 if (GET_MODE_CLASS (mode) == MODE_INT)
12940 return GENERAL_REGS;
12942 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12944 if (! FP_REG_CLASS_P (rclass)
12945 || !(const_zero_operand (x, mode)
12946 || const_all_ones_operand (x, mode)))
12947 return NO_REGS;
12951 if (TARGET_VIS3
12952 && ! TARGET_ARCH64
12953 && (rclass == EXTRA_FP_REGS
12954 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12956 int regno = true_regnum (x);
12958 if (SPARC_INT_REG_P (regno))
12959 return (rclass == EXTRA_FP_REGS
12960 ? FP_REGS : GENERAL_OR_FP_REGS);
12963 return rclass;
12966 /* Return true if we use LRA instead of reload pass. */
12968 static bool
12969 sparc_lra_p (void)
12971 return TARGET_LRA;
12974 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12975 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12977 const char *
12978 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12980 char mulstr[32];
12982 gcc_assert (! TARGET_ARCH64);
12984 if (sparc_check_64 (operands[1], insn) <= 0)
12985 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12986 if (which_alternative == 1)
12987 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12988 if (GET_CODE (operands[2]) == CONST_INT)
12990 if (which_alternative == 1)
12992 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12993 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12994 output_asm_insn (mulstr, operands);
12995 return "srlx\t%L0, 32, %H0";
12997 else
12999 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13000 output_asm_insn ("or\t%L1, %3, %3", operands);
13001 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13002 output_asm_insn (mulstr, operands);
13003 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13004 return "mov\t%3, %L0";
13007 else if (rtx_equal_p (operands[1], operands[2]))
13009 if (which_alternative == 1)
13011 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13012 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13013 output_asm_insn (mulstr, operands);
13014 return "srlx\t%L0, 32, %H0";
13016 else
13018 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13019 output_asm_insn ("or\t%L1, %3, %3", operands);
13020 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13021 output_asm_insn (mulstr, operands);
13022 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13023 return "mov\t%3, %L0";
13026 if (sparc_check_64 (operands[2], insn) <= 0)
13027 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13028 if (which_alternative == 1)
13030 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13031 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13032 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13033 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13034 output_asm_insn (mulstr, operands);
13035 return "srlx\t%L0, 32, %H0";
13037 else
13039 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13040 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13041 output_asm_insn ("or\t%L1, %3, %3", operands);
13042 output_asm_insn ("or\t%L2, %4, %4", operands);
13043 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13044 output_asm_insn (mulstr, operands);
13045 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13046 return "mov\t%3, %L0";
13050 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13051 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13052 and INNER_MODE are the modes describing TARGET. */
13054 static void
13055 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13056 machine_mode inner_mode)
13058 rtx t1, final_insn, sel;
13059 int bmask;
13061 t1 = gen_reg_rtx (mode);
13063 elt = convert_modes (SImode, inner_mode, elt, true);
13064 emit_move_insn (gen_lowpart(SImode, t1), elt);
13066 switch (mode)
13068 case E_V2SImode:
13069 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13070 bmask = 0x45674567;
13071 break;
13072 case E_V4HImode:
13073 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13074 bmask = 0x67676767;
13075 break;
13076 case E_V8QImode:
13077 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13078 bmask = 0x77777777;
13079 break;
13080 default:
13081 gcc_unreachable ();
13084 sel = force_reg (SImode, GEN_INT (bmask));
13085 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13086 emit_insn (final_insn);
13089 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13090 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13092 static void
13093 vector_init_fpmerge (rtx target, rtx elt)
13095 rtx t1, t2, t2_low, t3, t3_low;
13097 t1 = gen_reg_rtx (V4QImode);
13098 elt = convert_modes (SImode, QImode, elt, true);
13099 emit_move_insn (gen_lowpart (SImode, t1), elt);
13101 t2 = gen_reg_rtx (V8QImode);
13102 t2_low = gen_lowpart (V4QImode, t2);
13103 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13105 t3 = gen_reg_rtx (V8QImode);
13106 t3_low = gen_lowpart (V4QImode, t3);
13107 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13109 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13112 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13113 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13115 static void
13116 vector_init_faligndata (rtx target, rtx elt)
13118 rtx t1 = gen_reg_rtx (V4HImode);
13119 int i;
13121 elt = convert_modes (SImode, HImode, elt, true);
13122 emit_move_insn (gen_lowpart (SImode, t1), elt);
13124 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13125 force_reg (SImode, GEN_INT (6)),
13126 const0_rtx));
13128 for (i = 0; i < 4; i++)
13129 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13132 /* Emit code to initialize TARGET to values for individual fields VALS. */
13134 void
13135 sparc_expand_vector_init (rtx target, rtx vals)
13137 const machine_mode mode = GET_MODE (target);
13138 const machine_mode inner_mode = GET_MODE_INNER (mode);
13139 const int n_elts = GET_MODE_NUNITS (mode);
13140 int i, n_var = 0;
13141 bool all_same = true;
13142 rtx mem;
13144 for (i = 0; i < n_elts; i++)
13146 rtx x = XVECEXP (vals, 0, i);
13147 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13148 n_var++;
13150 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13151 all_same = false;
13154 if (n_var == 0)
13156 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13157 return;
13160 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13162 if (GET_MODE_SIZE (inner_mode) == 4)
13164 emit_move_insn (gen_lowpart (SImode, target),
13165 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13166 return;
13168 else if (GET_MODE_SIZE (inner_mode) == 8)
13170 emit_move_insn (gen_lowpart (DImode, target),
13171 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13172 return;
13175 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13176 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13178 emit_move_insn (gen_highpart (word_mode, target),
13179 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13180 emit_move_insn (gen_lowpart (word_mode, target),
13181 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13182 return;
13185 if (all_same && GET_MODE_SIZE (mode) == 8)
13187 if (TARGET_VIS2)
13189 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13190 return;
13192 if (mode == V8QImode)
13194 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13195 return;
13197 if (mode == V4HImode)
13199 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13200 return;
13204 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13205 for (i = 0; i < n_elts; i++)
13206 emit_move_insn (adjust_address_nv (mem, inner_mode,
13207 i * GET_MODE_SIZE (inner_mode)),
13208 XVECEXP (vals, 0, i));
13209 emit_move_insn (target, mem);
13212 /* Implement TARGET_SECONDARY_RELOAD. */
13214 static reg_class_t
13215 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13216 machine_mode mode, secondary_reload_info *sri)
13218 enum reg_class rclass = (enum reg_class) rclass_i;
13220 sri->icode = CODE_FOR_nothing;
13221 sri->extra_cost = 0;
13223 /* We need a temporary when loading/storing a HImode/QImode value
13224 between memory and the FPU registers. This can happen when combine puts
13225 a paradoxical subreg in a float/fix conversion insn. */
13226 if (FP_REG_CLASS_P (rclass)
13227 && (mode == HImode || mode == QImode)
13228 && (GET_CODE (x) == MEM
13229 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13230 && true_regnum (x) == -1)))
13231 return GENERAL_REGS;
13233 /* On 32-bit we need a temporary when loading/storing a DFmode value
13234 between unaligned memory and the upper FPU registers. */
13235 if (TARGET_ARCH32
13236 && rclass == EXTRA_FP_REGS
13237 && mode == DFmode
13238 && GET_CODE (x) == MEM
13239 && ! mem_min_alignment (x, 8))
13240 return FP_REGS;
13242 if (((TARGET_CM_MEDANY
13243 && symbolic_operand (x, mode))
13244 || (TARGET_CM_EMBMEDANY
13245 && text_segment_operand (x, mode)))
13246 && ! flag_pic)
13248 if (in_p)
13249 sri->icode = direct_optab_handler (reload_in_optab, mode);
13250 else
13251 sri->icode = direct_optab_handler (reload_out_optab, mode);
13252 return NO_REGS;
13255 if (TARGET_VIS3 && TARGET_ARCH32)
13257 int regno = true_regnum (x);
13259 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13260 to move 8-byte values in 4-byte pieces. This only works via
13261 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13262 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13263 an FP_REGS intermediate move. */
13264 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13265 || ((general_or_i64_p (rclass)
13266 || rclass == GENERAL_OR_FP_REGS)
13267 && SPARC_FP_REG_P (regno)))
13269 sri->extra_cost = 2;
13270 return FP_REGS;
13274 return NO_REGS;
13277 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13279 On SPARC when not VIS3 it is not possible to directly move data
13280 between GENERAL_REGS and FP_REGS. */
13282 static bool
13283 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13284 reg_class_t class2)
13286 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13287 && (! TARGET_VIS3
13288 || GET_MODE_SIZE (mode) > 8
13289 || GET_MODE_SIZE (mode) < 4));
13292 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13294 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13295 because the movsi and movsf patterns don't handle r/f moves.
13296 For v8 we copy the default definition. */
13298 static machine_mode
13299 sparc_secondary_memory_needed_mode (machine_mode mode)
13301 if (TARGET_ARCH64)
13303 if (GET_MODE_BITSIZE (mode) < 32)
13304 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13305 return mode;
13307 else
13309 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13310 return mode_for_size (BITS_PER_WORD,
13311 GET_MODE_CLASS (mode), 0).require ();
13312 return mode;
13316 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13317 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13319 bool
13320 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13322 enum rtx_code rc = GET_CODE (operands[1]);
13323 machine_mode cmp_mode;
13324 rtx cc_reg, dst, cmp;
13326 cmp = operands[1];
13327 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13328 return false;
13330 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13331 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13333 cmp_mode = GET_MODE (XEXP (cmp, 0));
13334 rc = GET_CODE (cmp);
13336 dst = operands[0];
13337 if (! rtx_equal_p (operands[2], dst)
13338 && ! rtx_equal_p (operands[3], dst))
13340 if (reg_overlap_mentioned_p (dst, cmp))
13341 dst = gen_reg_rtx (mode);
13343 emit_move_insn (dst, operands[3]);
13345 else if (operands[2] == dst)
13347 operands[2] = operands[3];
13349 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13350 rc = reverse_condition_maybe_unordered (rc);
13351 else
13352 rc = reverse_condition (rc);
13355 if (XEXP (cmp, 1) == const0_rtx
13356 && GET_CODE (XEXP (cmp, 0)) == REG
13357 && cmp_mode == DImode
13358 && v9_regcmp_p (rc))
13359 cc_reg = XEXP (cmp, 0);
13360 else
13361 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13363 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13365 emit_insn (gen_rtx_SET (dst,
13366 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13368 if (dst != operands[0])
13369 emit_move_insn (operands[0], dst);
13371 return true;
13374 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13375 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13376 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13377 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13378 code to be used for the condition mask. */
13380 void
13381 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13383 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13384 enum rtx_code code = GET_CODE (operands[3]);
13386 mask = gen_reg_rtx (Pmode);
13387 cop0 = operands[4];
13388 cop1 = operands[5];
13389 if (code == LT || code == GE)
13391 rtx t;
13393 code = swap_condition (code);
13394 t = cop0; cop0 = cop1; cop1 = t;
13397 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13399 fcmp = gen_rtx_UNSPEC (Pmode,
13400 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13401 fcode);
13403 cmask = gen_rtx_UNSPEC (DImode,
13404 gen_rtvec (2, mask, gsr),
13405 ccode);
13407 bshuf = gen_rtx_UNSPEC (mode,
13408 gen_rtvec (3, operands[1], operands[2], gsr),
13409 UNSPEC_BSHUFFLE);
13411 emit_insn (gen_rtx_SET (mask, fcmp));
13412 emit_insn (gen_rtx_SET (gsr, cmask));
13414 emit_insn (gen_rtx_SET (operands[0], bshuf));
13417 /* On sparc, any mode which naturally allocates into the float
13418 registers should return 4 here. */
13420 unsigned int
13421 sparc_regmode_natural_size (machine_mode mode)
13423 int size = UNITS_PER_WORD;
13425 if (TARGET_ARCH64)
13427 enum mode_class mclass = GET_MODE_CLASS (mode);
13429 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13430 size = 4;
13433 return size;
13436 /* Implement TARGET_HARD_REGNO_NREGS.
13438 On SPARC, ordinary registers hold 32 bits worth; this means both
13439 integer and floating point registers. On v9, integer regs hold 64
13440 bits worth; floating point regs hold 32 bits worth (this includes the
13441 new fp regs as even the odd ones are included in the hard register
13442 count). */
13444 static unsigned int
13445 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13447 if (regno == SPARC_GSR_REG)
13448 return 1;
13449 if (TARGET_ARCH64)
13451 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13452 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13453 return CEIL (GET_MODE_SIZE (mode), 4);
13455 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13458 /* Implement TARGET_HARD_REGNO_MODE_OK.
13460 ??? Because of the funny way we pass parameters we should allow certain
13461 ??? types of float/complex values to be in integer registers during
13462 ??? RTL generation. This only matters on arch32. */
13464 static bool
13465 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13467 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13470 /* Implement TARGET_MODES_TIEABLE_P.
13472 For V9 we have to deal with the fact that only the lower 32 floating
13473 point registers are 32-bit addressable. */
13475 static bool
13476 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13478 enum mode_class mclass1, mclass2;
13479 unsigned short size1, size2;
13481 if (mode1 == mode2)
13482 return true;
13484 mclass1 = GET_MODE_CLASS (mode1);
13485 mclass2 = GET_MODE_CLASS (mode2);
13486 if (mclass1 != mclass2)
13487 return false;
13489 if (! TARGET_V9)
13490 return true;
13492 /* Classes are the same and we are V9 so we have to deal with upper
13493 vs. lower floating point registers. If one of the modes is a
13494 4-byte mode, and the other is not, we have to mark them as not
13495 tieable because only the lower 32 floating point register are
13496 addressable 32-bits at a time.
13498 We can't just test explicitly for SFmode, otherwise we won't
13499 cover the vector mode cases properly. */
13501 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13502 return true;
13504 size1 = GET_MODE_SIZE (mode1);
13505 size2 = GET_MODE_SIZE (mode2);
13506 if ((size1 > 4 && size2 == 4)
13507 || (size2 > 4 && size1 == 4))
13508 return false;
13510 return true;
13513 /* Implement TARGET_CSTORE_MODE. */
13515 static scalar_int_mode
13516 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13518 return (TARGET_ARCH64 ? DImode : SImode);
13521 /* Return the compound expression made of T1 and T2. */
13523 static inline tree
13524 compound_expr (tree t1, tree t2)
13526 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13529 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13531 static void
13532 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13534 if (!TARGET_FPU)
13535 return;
13537 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13538 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13540 /* We generate the equivalent of feholdexcept (&fenv_var):
13542 unsigned int fenv_var;
13543 __builtin_store_fsr (&fenv_var);
13545 unsigned int tmp1_var;
13546 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13548 __builtin_load_fsr (&tmp1_var); */
13550 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13551 TREE_ADDRESSABLE (fenv_var) = 1;
13552 tree fenv_addr = build_fold_addr_expr (fenv_var);
13553 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13554 tree hold_stfsr
13555 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13556 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13558 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13559 TREE_ADDRESSABLE (tmp1_var) = 1;
13560 tree masked_fenv_var
13561 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13562 build_int_cst (unsigned_type_node,
13563 ~(accrued_exception_mask | trap_enable_mask)));
13564 tree hold_mask
13565 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13566 NULL_TREE, NULL_TREE);
13568 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13569 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13570 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13572 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13574 /* We reload the value of tmp1_var to clear the exceptions:
13576 __builtin_load_fsr (&tmp1_var); */
13578 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13580 /* We generate the equivalent of feupdateenv (&fenv_var):
13582 unsigned int tmp2_var;
13583 __builtin_store_fsr (&tmp2_var);
13585 __builtin_load_fsr (&fenv_var);
13587 if (SPARC_LOW_FE_EXCEPT_VALUES)
13588 tmp2_var >>= 5;
13589 __atomic_feraiseexcept ((int) tmp2_var); */
13591 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13592 TREE_ADDRESSABLE (tmp2_var) = 1;
13593 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13594 tree update_stfsr
13595 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13596 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13598 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13600 tree atomic_feraiseexcept
13601 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13602 tree update_call
13603 = build_call_expr (atomic_feraiseexcept, 1,
13604 fold_convert (integer_type_node, tmp2_var));
13606 if (SPARC_LOW_FE_EXCEPT_VALUES)
13608 tree shifted_tmp2_var
13609 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13610 build_int_cst (unsigned_type_node, 5));
13611 tree update_shift
13612 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13613 update_call = compound_expr (update_shift, update_call);
13616 *update
13617 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13620 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13622 SImode loads to floating-point registers are not zero-extended.
13623 The definition for LOAD_EXTEND_OP specifies that integer loads
13624 narrower than BITS_PER_WORD will be zero-extended. As a result,
13625 we inhibit changes from SImode unless they are to a mode that is
13626 identical in size.
13628 Likewise for SFmode, since word-mode paradoxical subregs are
13629 problematic on big-endian architectures. */
13631 static bool
13632 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13633 reg_class_t rclass)
13635 if (TARGET_ARCH64
13636 && GET_MODE_SIZE (from) == 4
13637 && GET_MODE_SIZE (to) != 4)
13638 return !reg_classes_intersect_p (rclass, FP_REGS);
13639 return true;
13642 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13644 static HOST_WIDE_INT
13645 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13647 if (TREE_CODE (exp) == STRING_CST)
13648 return MAX (align, FASTEST_ALIGNMENT);
13649 return align;
13652 #include "gt-sparc.h"