Use vec<> in build_vector
[official-gcc.git] / gcc / config / sparc / sparc.c
blob53689a1ccfa27fb39d4d80997b3640b8c3cd0673
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2017 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "gimple.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "expmed.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "debug.h"
52 #include "common/common-target.h"
53 #include "gimplify.h"
54 #include "langhooks.h"
55 #include "reload.h"
56 #include "params.h"
57 #include "tree-pass.h"
58 #include "context.h"
59 #include "builtins.h"
61 /* This file should be included last. */
62 #include "target-def.h"
64 /* Processor costs */
66 struct processor_costs {
67 /* Integer load */
68 const int int_load;
70 /* Integer signed load */
71 const int int_sload;
73 /* Integer zeroed load */
74 const int int_zload;
76 /* Float load */
77 const int float_load;
79 /* fmov, fneg, fabs */
80 const int float_move;
82 /* fadd, fsub */
83 const int float_plusminus;
85 /* fcmp */
86 const int float_cmp;
88 /* fmov, fmovr */
89 const int float_cmove;
91 /* fmul */
92 const int float_mul;
94 /* fdivs */
95 const int float_div_sf;
97 /* fdivd */
98 const int float_div_df;
100 /* fsqrts */
101 const int float_sqrt_sf;
103 /* fsqrtd */
104 const int float_sqrt_df;
106 /* umul/smul */
107 const int int_mul;
109 /* mulX */
110 const int int_mulX;
112 /* integer multiply cost for each bit set past the most
113 significant 3, so the formula for multiply cost becomes:
115 if (rs1 < 0)
116 highest_bit = highest_clear_bit(rs1);
117 else
118 highest_bit = highest_set_bit(rs1);
119 if (highest_bit < 3)
120 highest_bit = 3;
121 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
123 A value of zero indicates that the multiply costs is fixed,
124 and not variable. */
125 const int int_mul_bit_factor;
127 /* udiv/sdiv */
128 const int int_div;
130 /* divX */
131 const int int_divX;
133 /* movcc, movr */
134 const int int_cmove;
136 /* penalty for shifts, due to scheduling rules etc. */
137 const int shift_penalty;
140 static const
141 struct processor_costs cypress_costs = {
142 COSTS_N_INSNS (2), /* int load */
143 COSTS_N_INSNS (2), /* int signed load */
144 COSTS_N_INSNS (2), /* int zeroed load */
145 COSTS_N_INSNS (2), /* float load */
146 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
147 COSTS_N_INSNS (5), /* fadd, fsub */
148 COSTS_N_INSNS (1), /* fcmp */
149 COSTS_N_INSNS (1), /* fmov, fmovr */
150 COSTS_N_INSNS (7), /* fmul */
151 COSTS_N_INSNS (37), /* fdivs */
152 COSTS_N_INSNS (37), /* fdivd */
153 COSTS_N_INSNS (63), /* fsqrts */
154 COSTS_N_INSNS (63), /* fsqrtd */
155 COSTS_N_INSNS (1), /* imul */
156 COSTS_N_INSNS (1), /* imulX */
157 0, /* imul bit factor */
158 COSTS_N_INSNS (1), /* idiv */
159 COSTS_N_INSNS (1), /* idivX */
160 COSTS_N_INSNS (1), /* movcc/movr */
161 0, /* shift penalty */
164 static const
165 struct processor_costs supersparc_costs = {
166 COSTS_N_INSNS (1), /* int load */
167 COSTS_N_INSNS (1), /* int signed load */
168 COSTS_N_INSNS (1), /* int zeroed load */
169 COSTS_N_INSNS (0), /* float load */
170 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
171 COSTS_N_INSNS (3), /* fadd, fsub */
172 COSTS_N_INSNS (3), /* fcmp */
173 COSTS_N_INSNS (1), /* fmov, fmovr */
174 COSTS_N_INSNS (3), /* fmul */
175 COSTS_N_INSNS (6), /* fdivs */
176 COSTS_N_INSNS (9), /* fdivd */
177 COSTS_N_INSNS (12), /* fsqrts */
178 COSTS_N_INSNS (12), /* fsqrtd */
179 COSTS_N_INSNS (4), /* imul */
180 COSTS_N_INSNS (4), /* imulX */
181 0, /* imul bit factor */
182 COSTS_N_INSNS (4), /* idiv */
183 COSTS_N_INSNS (4), /* idivX */
184 COSTS_N_INSNS (1), /* movcc/movr */
185 1, /* shift penalty */
188 static const
189 struct processor_costs hypersparc_costs = {
190 COSTS_N_INSNS (1), /* int load */
191 COSTS_N_INSNS (1), /* int signed load */
192 COSTS_N_INSNS (1), /* int zeroed load */
193 COSTS_N_INSNS (1), /* float load */
194 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
195 COSTS_N_INSNS (1), /* fadd, fsub */
196 COSTS_N_INSNS (1), /* fcmp */
197 COSTS_N_INSNS (1), /* fmov, fmovr */
198 COSTS_N_INSNS (1), /* fmul */
199 COSTS_N_INSNS (8), /* fdivs */
200 COSTS_N_INSNS (12), /* fdivd */
201 COSTS_N_INSNS (17), /* fsqrts */
202 COSTS_N_INSNS (17), /* fsqrtd */
203 COSTS_N_INSNS (17), /* imul */
204 COSTS_N_INSNS (17), /* imulX */
205 0, /* imul bit factor */
206 COSTS_N_INSNS (17), /* idiv */
207 COSTS_N_INSNS (17), /* idivX */
208 COSTS_N_INSNS (1), /* movcc/movr */
209 0, /* shift penalty */
212 static const
213 struct processor_costs leon_costs = {
214 COSTS_N_INSNS (1), /* int load */
215 COSTS_N_INSNS (1), /* int signed load */
216 COSTS_N_INSNS (1), /* int zeroed load */
217 COSTS_N_INSNS (1), /* float load */
218 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
219 COSTS_N_INSNS (1), /* fadd, fsub */
220 COSTS_N_INSNS (1), /* fcmp */
221 COSTS_N_INSNS (1), /* fmov, fmovr */
222 COSTS_N_INSNS (1), /* fmul */
223 COSTS_N_INSNS (15), /* fdivs */
224 COSTS_N_INSNS (15), /* fdivd */
225 COSTS_N_INSNS (23), /* fsqrts */
226 COSTS_N_INSNS (23), /* fsqrtd */
227 COSTS_N_INSNS (5), /* imul */
228 COSTS_N_INSNS (5), /* imulX */
229 0, /* imul bit factor */
230 COSTS_N_INSNS (5), /* idiv */
231 COSTS_N_INSNS (5), /* idivX */
232 COSTS_N_INSNS (1), /* movcc/movr */
233 0, /* shift penalty */
236 static const
237 struct processor_costs leon3_costs = {
238 COSTS_N_INSNS (1), /* int load */
239 COSTS_N_INSNS (1), /* int signed load */
240 COSTS_N_INSNS (1), /* int zeroed load */
241 COSTS_N_INSNS (1), /* float load */
242 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
243 COSTS_N_INSNS (1), /* fadd, fsub */
244 COSTS_N_INSNS (1), /* fcmp */
245 COSTS_N_INSNS (1), /* fmov, fmovr */
246 COSTS_N_INSNS (1), /* fmul */
247 COSTS_N_INSNS (14), /* fdivs */
248 COSTS_N_INSNS (15), /* fdivd */
249 COSTS_N_INSNS (22), /* fsqrts */
250 COSTS_N_INSNS (23), /* fsqrtd */
251 COSTS_N_INSNS (5), /* imul */
252 COSTS_N_INSNS (5), /* imulX */
253 0, /* imul bit factor */
254 COSTS_N_INSNS (35), /* idiv */
255 COSTS_N_INSNS (35), /* idivX */
256 COSTS_N_INSNS (1), /* movcc/movr */
257 0, /* shift penalty */
260 static const
261 struct processor_costs sparclet_costs = {
262 COSTS_N_INSNS (3), /* int load */
263 COSTS_N_INSNS (3), /* int signed load */
264 COSTS_N_INSNS (1), /* int zeroed load */
265 COSTS_N_INSNS (1), /* float load */
266 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
267 COSTS_N_INSNS (1), /* fadd, fsub */
268 COSTS_N_INSNS (1), /* fcmp */
269 COSTS_N_INSNS (1), /* fmov, fmovr */
270 COSTS_N_INSNS (1), /* fmul */
271 COSTS_N_INSNS (1), /* fdivs */
272 COSTS_N_INSNS (1), /* fdivd */
273 COSTS_N_INSNS (1), /* fsqrts */
274 COSTS_N_INSNS (1), /* fsqrtd */
275 COSTS_N_INSNS (5), /* imul */
276 COSTS_N_INSNS (5), /* imulX */
277 0, /* imul bit factor */
278 COSTS_N_INSNS (5), /* idiv */
279 COSTS_N_INSNS (5), /* idivX */
280 COSTS_N_INSNS (1), /* movcc/movr */
281 0, /* shift penalty */
284 static const
285 struct processor_costs ultrasparc_costs = {
286 COSTS_N_INSNS (2), /* int load */
287 COSTS_N_INSNS (3), /* int signed load */
288 COSTS_N_INSNS (2), /* int zeroed load */
289 COSTS_N_INSNS (2), /* float load */
290 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
291 COSTS_N_INSNS (4), /* fadd, fsub */
292 COSTS_N_INSNS (1), /* fcmp */
293 COSTS_N_INSNS (2), /* fmov, fmovr */
294 COSTS_N_INSNS (4), /* fmul */
295 COSTS_N_INSNS (13), /* fdivs */
296 COSTS_N_INSNS (23), /* fdivd */
297 COSTS_N_INSNS (13), /* fsqrts */
298 COSTS_N_INSNS (23), /* fsqrtd */
299 COSTS_N_INSNS (4), /* imul */
300 COSTS_N_INSNS (4), /* imulX */
301 2, /* imul bit factor */
302 COSTS_N_INSNS (37), /* idiv */
303 COSTS_N_INSNS (68), /* idivX */
304 COSTS_N_INSNS (2), /* movcc/movr */
305 2, /* shift penalty */
308 static const
309 struct processor_costs ultrasparc3_costs = {
310 COSTS_N_INSNS (2), /* int load */
311 COSTS_N_INSNS (3), /* int signed load */
312 COSTS_N_INSNS (3), /* int zeroed load */
313 COSTS_N_INSNS (2), /* float load */
314 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
315 COSTS_N_INSNS (4), /* fadd, fsub */
316 COSTS_N_INSNS (5), /* fcmp */
317 COSTS_N_INSNS (3), /* fmov, fmovr */
318 COSTS_N_INSNS (4), /* fmul */
319 COSTS_N_INSNS (17), /* fdivs */
320 COSTS_N_INSNS (20), /* fdivd */
321 COSTS_N_INSNS (20), /* fsqrts */
322 COSTS_N_INSNS (29), /* fsqrtd */
323 COSTS_N_INSNS (6), /* imul */
324 COSTS_N_INSNS (6), /* imulX */
325 0, /* imul bit factor */
326 COSTS_N_INSNS (40), /* idiv */
327 COSTS_N_INSNS (71), /* idivX */
328 COSTS_N_INSNS (2), /* movcc/movr */
329 0, /* shift penalty */
332 static const
333 struct processor_costs niagara_costs = {
334 COSTS_N_INSNS (3), /* int load */
335 COSTS_N_INSNS (3), /* int signed load */
336 COSTS_N_INSNS (3), /* int zeroed load */
337 COSTS_N_INSNS (9), /* float load */
338 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
339 COSTS_N_INSNS (8), /* fadd, fsub */
340 COSTS_N_INSNS (26), /* fcmp */
341 COSTS_N_INSNS (8), /* fmov, fmovr */
342 COSTS_N_INSNS (29), /* fmul */
343 COSTS_N_INSNS (54), /* fdivs */
344 COSTS_N_INSNS (83), /* fdivd */
345 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
346 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
347 COSTS_N_INSNS (11), /* imul */
348 COSTS_N_INSNS (11), /* imulX */
349 0, /* imul bit factor */
350 COSTS_N_INSNS (72), /* idiv */
351 COSTS_N_INSNS (72), /* idivX */
352 COSTS_N_INSNS (1), /* movcc/movr */
353 0, /* shift penalty */
356 static const
357 struct processor_costs niagara2_costs = {
358 COSTS_N_INSNS (3), /* int load */
359 COSTS_N_INSNS (3), /* int signed load */
360 COSTS_N_INSNS (3), /* int zeroed load */
361 COSTS_N_INSNS (3), /* float load */
362 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
363 COSTS_N_INSNS (6), /* fadd, fsub */
364 COSTS_N_INSNS (6), /* fcmp */
365 COSTS_N_INSNS (6), /* fmov, fmovr */
366 COSTS_N_INSNS (6), /* fmul */
367 COSTS_N_INSNS (19), /* fdivs */
368 COSTS_N_INSNS (33), /* fdivd */
369 COSTS_N_INSNS (19), /* fsqrts */
370 COSTS_N_INSNS (33), /* fsqrtd */
371 COSTS_N_INSNS (5), /* imul */
372 COSTS_N_INSNS (5), /* imulX */
373 0, /* imul bit factor */
374 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
375 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
376 COSTS_N_INSNS (1), /* movcc/movr */
377 0, /* shift penalty */
380 static const
381 struct processor_costs niagara3_costs = {
382 COSTS_N_INSNS (3), /* int load */
383 COSTS_N_INSNS (3), /* int signed load */
384 COSTS_N_INSNS (3), /* int zeroed load */
385 COSTS_N_INSNS (3), /* float load */
386 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
387 COSTS_N_INSNS (9), /* fadd, fsub */
388 COSTS_N_INSNS (9), /* fcmp */
389 COSTS_N_INSNS (9), /* fmov, fmovr */
390 COSTS_N_INSNS (9), /* fmul */
391 COSTS_N_INSNS (23), /* fdivs */
392 COSTS_N_INSNS (37), /* fdivd */
393 COSTS_N_INSNS (23), /* fsqrts */
394 COSTS_N_INSNS (37), /* fsqrtd */
395 COSTS_N_INSNS (9), /* imul */
396 COSTS_N_INSNS (9), /* imulX */
397 0, /* imul bit factor */
398 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
399 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
400 COSTS_N_INSNS (1), /* movcc/movr */
401 0, /* shift penalty */
404 static const
405 struct processor_costs niagara4_costs = {
406 COSTS_N_INSNS (5), /* int load */
407 COSTS_N_INSNS (5), /* int signed load */
408 COSTS_N_INSNS (5), /* int zeroed load */
409 COSTS_N_INSNS (5), /* float load */
410 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
411 COSTS_N_INSNS (11), /* fadd, fsub */
412 COSTS_N_INSNS (11), /* fcmp */
413 COSTS_N_INSNS (11), /* fmov, fmovr */
414 COSTS_N_INSNS (11), /* fmul */
415 COSTS_N_INSNS (24), /* fdivs */
416 COSTS_N_INSNS (37), /* fdivd */
417 COSTS_N_INSNS (24), /* fsqrts */
418 COSTS_N_INSNS (37), /* fsqrtd */
419 COSTS_N_INSNS (12), /* imul */
420 COSTS_N_INSNS (12), /* imulX */
421 0, /* imul bit factor */
422 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
423 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
424 COSTS_N_INSNS (1), /* movcc/movr */
425 0, /* shift penalty */
428 static const
429 struct processor_costs niagara7_costs = {
430 COSTS_N_INSNS (5), /* int load */
431 COSTS_N_INSNS (5), /* int signed load */
432 COSTS_N_INSNS (5), /* int zeroed load */
433 COSTS_N_INSNS (5), /* float load */
434 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
435 COSTS_N_INSNS (11), /* fadd, fsub */
436 COSTS_N_INSNS (11), /* fcmp */
437 COSTS_N_INSNS (11), /* fmov, fmovr */
438 COSTS_N_INSNS (11), /* fmul */
439 COSTS_N_INSNS (24), /* fdivs */
440 COSTS_N_INSNS (37), /* fdivd */
441 COSTS_N_INSNS (24), /* fsqrts */
442 COSTS_N_INSNS (37), /* fsqrtd */
443 COSTS_N_INSNS (12), /* imul */
444 COSTS_N_INSNS (12), /* imulX */
445 0, /* imul bit factor */
446 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
447 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
448 COSTS_N_INSNS (1), /* movcc/movr */
449 0, /* shift penalty */
452 static const
453 struct processor_costs m8_costs = {
454 COSTS_N_INSNS (3), /* int load */
455 COSTS_N_INSNS (3), /* int signed load */
456 COSTS_N_INSNS (3), /* int zeroed load */
457 COSTS_N_INSNS (3), /* float load */
458 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
459 COSTS_N_INSNS (9), /* fadd, fsub */
460 COSTS_N_INSNS (9), /* fcmp */
461 COSTS_N_INSNS (9), /* fmov, fmovr */
462 COSTS_N_INSNS (9), /* fmul */
463 COSTS_N_INSNS (26), /* fdivs */
464 COSTS_N_INSNS (30), /* fdivd */
465 COSTS_N_INSNS (33), /* fsqrts */
466 COSTS_N_INSNS (41), /* fsqrtd */
467 COSTS_N_INSNS (12), /* imul */
468 COSTS_N_INSNS (10), /* imulX */
469 0, /* imul bit factor */
470 COSTS_N_INSNS (57), /* udiv/sdiv */
471 COSTS_N_INSNS (30), /* udivx/sdivx */
472 COSTS_N_INSNS (1), /* movcc/movr */
473 0, /* shift penalty */
476 static const struct processor_costs *sparc_costs = &cypress_costs;
478 #ifdef HAVE_AS_RELAX_OPTION
479 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
480 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
481 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
482 somebody does not branch between the sethi and jmp. */
483 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
484 #else
485 #define LEAF_SIBCALL_SLOT_RESERVED_P \
486 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
487 #endif
489 /* Vector to say how input registers are mapped to output registers.
490 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
491 eliminate it. You must use -fomit-frame-pointer to get that. */
492 char leaf_reg_remap[] =
493 { 0, 1, 2, 3, 4, 5, 6, 7,
494 -1, -1, -1, -1, -1, -1, 14, -1,
495 -1, -1, -1, -1, -1, -1, -1, -1,
496 8, 9, 10, 11, 12, 13, -1, 15,
498 32, 33, 34, 35, 36, 37, 38, 39,
499 40, 41, 42, 43, 44, 45, 46, 47,
500 48, 49, 50, 51, 52, 53, 54, 55,
501 56, 57, 58, 59, 60, 61, 62, 63,
502 64, 65, 66, 67, 68, 69, 70, 71,
503 72, 73, 74, 75, 76, 77, 78, 79,
504 80, 81, 82, 83, 84, 85, 86, 87,
505 88, 89, 90, 91, 92, 93, 94, 95,
506 96, 97, 98, 99, 100, 101, 102};
508 /* Vector, indexed by hard register number, which contains 1
509 for a register that is allowable in a candidate for leaf
510 function treatment. */
511 char sparc_leaf_regs[] =
512 { 1, 1, 1, 1, 1, 1, 1, 1,
513 0, 0, 0, 0, 0, 0, 1, 0,
514 0, 0, 0, 0, 0, 0, 0, 0,
515 1, 1, 1, 1, 1, 1, 0, 1,
516 1, 1, 1, 1, 1, 1, 1, 1,
517 1, 1, 1, 1, 1, 1, 1, 1,
518 1, 1, 1, 1, 1, 1, 1, 1,
519 1, 1, 1, 1, 1, 1, 1, 1,
520 1, 1, 1, 1, 1, 1, 1, 1,
521 1, 1, 1, 1, 1, 1, 1, 1,
522 1, 1, 1, 1, 1, 1, 1, 1,
523 1, 1, 1, 1, 1, 1, 1, 1,
524 1, 1, 1, 1, 1, 1, 1};
526 struct GTY(()) machine_function
528 /* Size of the frame of the function. */
529 HOST_WIDE_INT frame_size;
531 /* Size of the frame of the function minus the register window save area
532 and the outgoing argument area. */
533 HOST_WIDE_INT apparent_frame_size;
535 /* Register we pretend the frame pointer is allocated to. Normally, this
536 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
537 record "offset" separately as it may be too big for (reg + disp). */
538 rtx frame_base_reg;
539 HOST_WIDE_INT frame_base_offset;
541 /* Number of global or FP registers to be saved (as 4-byte quantities). */
542 int n_global_fp_regs;
544 /* True if the current function is leaf and uses only leaf regs,
545 so that the SPARC leaf function optimization can be applied.
546 Private version of crtl->uses_only_leaf_regs, see
547 sparc_expand_prologue for the rationale. */
548 int leaf_function_p;
550 /* True if the prologue saves local or in registers. */
551 bool save_local_in_regs_p;
553 /* True if the data calculated by sparc_expand_prologue are valid. */
554 bool prologue_data_valid_p;
557 #define sparc_frame_size cfun->machine->frame_size
558 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
559 #define sparc_frame_base_reg cfun->machine->frame_base_reg
560 #define sparc_frame_base_offset cfun->machine->frame_base_offset
561 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
562 #define sparc_leaf_function_p cfun->machine->leaf_function_p
563 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
564 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
566 /* 1 if the next opcode is to be specially indented. */
567 int sparc_indent_opcode = 0;
569 static void sparc_option_override (void);
570 static void sparc_init_modes (void);
571 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
572 const_tree, bool, bool, int *, int *);
574 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
575 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
577 static void sparc_emit_set_const32 (rtx, rtx);
578 static void sparc_emit_set_const64 (rtx, rtx);
579 static void sparc_output_addr_vec (rtx);
580 static void sparc_output_addr_diff_vec (rtx);
581 static void sparc_output_deferred_case_vectors (void);
582 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
583 static bool sparc_legitimate_constant_p (machine_mode, rtx);
584 static rtx sparc_builtin_saveregs (void);
585 static int epilogue_renumber (rtx *, int);
586 static bool sparc_assemble_integer (rtx, unsigned int, int);
587 static int set_extends (rtx_insn *);
588 static void sparc_asm_function_prologue (FILE *);
589 static void sparc_asm_function_epilogue (FILE *);
590 #ifdef TARGET_SOLARIS
591 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
592 tree) ATTRIBUTE_UNUSED;
593 #endif
594 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
595 static int sparc_issue_rate (void);
596 static void sparc_sched_init (FILE *, int, int);
597 static int sparc_use_sched_lookahead (void);
599 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
600 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
601 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
602 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
603 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
605 static bool sparc_function_ok_for_sibcall (tree, tree);
606 static void sparc_init_libfuncs (void);
607 static void sparc_init_builtins (void);
608 static void sparc_fpu_init_builtins (void);
609 static void sparc_vis_init_builtins (void);
610 static tree sparc_builtin_decl (unsigned, bool);
611 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
612 static tree sparc_fold_builtin (tree, int, tree *, bool);
613 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
614 HOST_WIDE_INT, tree);
615 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
616 HOST_WIDE_INT, const_tree);
617 static struct machine_function * sparc_init_machine_status (void);
618 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
619 static rtx sparc_tls_get_addr (void);
620 static rtx sparc_tls_got (void);
621 static int sparc_register_move_cost (machine_mode,
622 reg_class_t, reg_class_t);
623 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
624 static rtx sparc_function_value (const_tree, const_tree, bool);
625 static rtx sparc_libcall_value (machine_mode, const_rtx);
626 static bool sparc_function_value_regno_p (const unsigned int);
627 static rtx sparc_struct_value_rtx (tree, int);
628 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
629 int *, const_tree, int);
630 static bool sparc_return_in_memory (const_tree, const_tree);
631 static bool sparc_strict_argument_naming (cumulative_args_t);
632 static void sparc_va_start (tree, rtx);
633 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
634 static bool sparc_vector_mode_supported_p (machine_mode);
635 static bool sparc_tls_referenced_p (rtx);
636 static rtx sparc_legitimize_tls_address (rtx);
637 static rtx sparc_legitimize_pic_address (rtx, rtx);
638 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
639 static rtx sparc_delegitimize_address (rtx);
640 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
641 static bool sparc_pass_by_reference (cumulative_args_t,
642 machine_mode, const_tree, bool);
643 static void sparc_function_arg_advance (cumulative_args_t,
644 machine_mode, const_tree, bool);
645 static rtx sparc_function_arg_1 (cumulative_args_t,
646 machine_mode, const_tree, bool, bool);
647 static rtx sparc_function_arg (cumulative_args_t,
648 machine_mode, const_tree, bool);
649 static rtx sparc_function_incoming_arg (cumulative_args_t,
650 machine_mode, const_tree, bool);
651 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
652 static unsigned int sparc_function_arg_boundary (machine_mode,
653 const_tree);
654 static int sparc_arg_partial_bytes (cumulative_args_t,
655 machine_mode, tree, bool);
656 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
657 static void sparc_file_end (void);
658 static bool sparc_frame_pointer_required (void);
659 static bool sparc_can_eliminate (const int, const int);
660 static rtx sparc_builtin_setjmp_frame_value (void);
661 static void sparc_conditional_register_usage (void);
662 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
663 static const char *sparc_mangle_type (const_tree);
664 #endif
665 static void sparc_trampoline_init (rtx, tree, rtx);
666 static machine_mode sparc_preferred_simd_mode (scalar_mode);
667 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
668 static bool sparc_lra_p (void);
669 static bool sparc_print_operand_punct_valid_p (unsigned char);
670 static void sparc_print_operand (FILE *, rtx, int);
671 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
672 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
673 machine_mode,
674 secondary_reload_info *);
675 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
676 reg_class_t);
677 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
678 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
679 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
680 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
681 static unsigned int sparc_min_arithmetic_precision (void);
682 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
683 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
684 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
687 #ifdef SUBTARGET_ATTRIBUTE_TABLE
688 /* Table of valid machine attributes. */
689 static const struct attribute_spec sparc_attribute_table[] =
691 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
692 do_diagnostic } */
693 SUBTARGET_ATTRIBUTE_TABLE,
694 { NULL, 0, 0, false, false, false, NULL, false }
696 #endif
698 /* Option handling. */
700 /* Parsed value. */
701 enum cmodel sparc_cmodel;
703 char sparc_hard_reg_printed[8];
705 /* Initialize the GCC target structure. */
707 /* The default is to use .half rather than .short for aligned HI objects. */
708 #undef TARGET_ASM_ALIGNED_HI_OP
709 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
711 #undef TARGET_ASM_UNALIGNED_HI_OP
712 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
713 #undef TARGET_ASM_UNALIGNED_SI_OP
714 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
715 #undef TARGET_ASM_UNALIGNED_DI_OP
716 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
718 /* The target hook has to handle DI-mode values. */
719 #undef TARGET_ASM_INTEGER
720 #define TARGET_ASM_INTEGER sparc_assemble_integer
722 #undef TARGET_ASM_FUNCTION_PROLOGUE
723 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
724 #undef TARGET_ASM_FUNCTION_EPILOGUE
725 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
727 #undef TARGET_SCHED_ADJUST_COST
728 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
729 #undef TARGET_SCHED_ISSUE_RATE
730 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
731 #undef TARGET_SCHED_INIT
732 #define TARGET_SCHED_INIT sparc_sched_init
733 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
734 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
736 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
737 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
739 #undef TARGET_INIT_LIBFUNCS
740 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
742 #undef TARGET_LEGITIMIZE_ADDRESS
743 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
744 #undef TARGET_DELEGITIMIZE_ADDRESS
745 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
746 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
747 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
749 #undef TARGET_INIT_BUILTINS
750 #define TARGET_INIT_BUILTINS sparc_init_builtins
751 #undef TARGET_BUILTIN_DECL
752 #define TARGET_BUILTIN_DECL sparc_builtin_decl
753 #undef TARGET_EXPAND_BUILTIN
754 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
755 #undef TARGET_FOLD_BUILTIN
756 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
758 #if TARGET_TLS
759 #undef TARGET_HAVE_TLS
760 #define TARGET_HAVE_TLS true
761 #endif
763 #undef TARGET_CANNOT_FORCE_CONST_MEM
764 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
766 #undef TARGET_ASM_OUTPUT_MI_THUNK
767 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
768 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
769 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
771 #undef TARGET_RTX_COSTS
772 #define TARGET_RTX_COSTS sparc_rtx_costs
773 #undef TARGET_ADDRESS_COST
774 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
775 #undef TARGET_REGISTER_MOVE_COST
776 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
778 #undef TARGET_PROMOTE_FUNCTION_MODE
779 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
781 #undef TARGET_FUNCTION_VALUE
782 #define TARGET_FUNCTION_VALUE sparc_function_value
783 #undef TARGET_LIBCALL_VALUE
784 #define TARGET_LIBCALL_VALUE sparc_libcall_value
785 #undef TARGET_FUNCTION_VALUE_REGNO_P
786 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
788 #undef TARGET_STRUCT_VALUE_RTX
789 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
790 #undef TARGET_RETURN_IN_MEMORY
791 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
792 #undef TARGET_MUST_PASS_IN_STACK
793 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
794 #undef TARGET_PASS_BY_REFERENCE
795 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
796 #undef TARGET_ARG_PARTIAL_BYTES
797 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
798 #undef TARGET_FUNCTION_ARG_ADVANCE
799 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
800 #undef TARGET_FUNCTION_ARG
801 #define TARGET_FUNCTION_ARG sparc_function_arg
802 #undef TARGET_FUNCTION_INCOMING_ARG
803 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
804 #undef TARGET_FUNCTION_ARG_PADDING
805 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
806 #undef TARGET_FUNCTION_ARG_BOUNDARY
807 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
809 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
810 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
811 #undef TARGET_STRICT_ARGUMENT_NAMING
812 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
814 #undef TARGET_EXPAND_BUILTIN_VA_START
815 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
816 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
817 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
819 #undef TARGET_VECTOR_MODE_SUPPORTED_P
820 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
822 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
823 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
825 #ifdef SUBTARGET_INSERT_ATTRIBUTES
826 #undef TARGET_INSERT_ATTRIBUTES
827 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
828 #endif
830 #ifdef SUBTARGET_ATTRIBUTE_TABLE
831 #undef TARGET_ATTRIBUTE_TABLE
832 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
833 #endif
835 #undef TARGET_OPTION_OVERRIDE
836 #define TARGET_OPTION_OVERRIDE sparc_option_override
838 #ifdef TARGET_THREAD_SSP_OFFSET
839 #undef TARGET_STACK_PROTECT_GUARD
840 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
841 #endif
843 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
844 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
845 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
846 #endif
848 #undef TARGET_ASM_FILE_END
849 #define TARGET_ASM_FILE_END sparc_file_end
851 #undef TARGET_FRAME_POINTER_REQUIRED
852 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
854 #undef TARGET_BUILTIN_SETJMP_FRAME_VALUE
855 #define TARGET_BUILTIN_SETJMP_FRAME_VALUE sparc_builtin_setjmp_frame_value
857 #undef TARGET_CAN_ELIMINATE
858 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
860 #undef TARGET_PREFERRED_RELOAD_CLASS
861 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
863 #undef TARGET_SECONDARY_RELOAD
864 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
865 #undef TARGET_SECONDARY_MEMORY_NEEDED
866 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
867 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
868 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
870 #undef TARGET_CONDITIONAL_REGISTER_USAGE
871 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
873 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
874 #undef TARGET_MANGLE_TYPE
875 #define TARGET_MANGLE_TYPE sparc_mangle_type
876 #endif
878 #undef TARGET_LRA_P
879 #define TARGET_LRA_P sparc_lra_p
881 #undef TARGET_LEGITIMATE_ADDRESS_P
882 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
884 #undef TARGET_LEGITIMATE_CONSTANT_P
885 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
887 #undef TARGET_TRAMPOLINE_INIT
888 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
890 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
891 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
892 #undef TARGET_PRINT_OPERAND
893 #define TARGET_PRINT_OPERAND sparc_print_operand
894 #undef TARGET_PRINT_OPERAND_ADDRESS
895 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
897 /* The value stored by LDSTUB. */
898 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
899 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
901 #undef TARGET_CSTORE_MODE
902 #define TARGET_CSTORE_MODE sparc_cstore_mode
904 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
905 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
907 #undef TARGET_FIXED_CONDITION_CODE_REGS
908 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
910 #undef TARGET_MIN_ARITHMETIC_PRECISION
911 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
913 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
914 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
916 #undef TARGET_HARD_REGNO_NREGS
917 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
918 #undef TARGET_HARD_REGNO_MODE_OK
919 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
921 #undef TARGET_MODES_TIEABLE_P
922 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
924 struct gcc_target targetm = TARGET_INITIALIZER;
926 /* Return the memory reference contained in X if any, zero otherwise. */
928 static rtx
929 mem_ref (rtx x)
931 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
932 x = XEXP (x, 0);
934 if (MEM_P (x))
935 return x;
937 return NULL_RTX;
940 /* We use a machine specific pass to enable workarounds for errata.
942 We need to have the (essentially) final form of the insn stream in order
943 to properly detect the various hazards. Therefore, this machine specific
944 pass runs as late as possible. */
946 /* True if INSN is a md pattern or asm statement. */
947 #define USEFUL_INSN_P(INSN) \
948 (NONDEBUG_INSN_P (INSN) \
949 && GET_CODE (PATTERN (INSN)) != USE \
950 && GET_CODE (PATTERN (INSN)) != CLOBBER)
952 static unsigned int
953 sparc_do_work_around_errata (void)
955 rtx_insn *insn, *next;
957 /* Force all instructions to be split into their final form. */
958 split_all_insns_noflow ();
960 /* Now look for specific patterns in the insn stream. */
961 for (insn = get_insns (); insn; insn = next)
963 bool insert_nop = false;
964 rtx set;
966 /* Look into the instruction in a delay slot. */
967 if (NONJUMP_INSN_P (insn))
968 if (rtx_sequence *seq = dyn_cast <rtx_sequence *> (PATTERN (insn)))
969 insn = seq->insn (1);
971 /* Look for either of these two sequences:
973 Sequence A:
974 1. store of word size or less (e.g. st / stb / sth / stf)
975 2. any single instruction that is not a load or store
976 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
978 Sequence B:
979 1. store of double word size (e.g. std / stdf)
980 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
981 if (sparc_fix_b2bst
982 && NONJUMP_INSN_P (insn)
983 && (set = single_set (insn)) != NULL_RTX
984 && MEM_P (SET_DEST (set)))
986 /* Sequence B begins with a double-word store. */
987 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
988 rtx_insn *after;
989 int i;
991 next = next_active_insn (insn);
992 if (!next)
993 break;
995 for (after = next, i = 0; i < 2; i++)
997 /* Skip empty assembly statements. */
998 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
999 || (USEFUL_INSN_P (after)
1000 && (asm_noperands (PATTERN (after))>=0)
1001 && !strcmp (decode_asm_operands (PATTERN (after),
1002 NULL, NULL, NULL,
1003 NULL, NULL), "")))
1004 after = next_active_insn (after);
1005 if (!after)
1006 break;
1008 /* If the insn is a branch, then it cannot be problematic. */
1009 if (!NONJUMP_INSN_P (after)
1010 || GET_CODE (PATTERN (after)) == SEQUENCE)
1011 break;
1013 /* Sequence B is only two instructions long. */
1014 if (seq_b)
1016 /* Add NOP if followed by a store. */
1017 if ((set = single_set (after)) != NULL_RTX
1018 && MEM_P (SET_DEST (set)))
1019 insert_nop = true;
1021 /* Otherwise it is ok. */
1022 break;
1025 /* If the second instruction is a load or a store,
1026 then the sequence cannot be problematic. */
1027 if (i == 0)
1029 if (((set = single_set (after)) != NULL_RTX)
1030 && (MEM_P (SET_DEST (set)) || MEM_P (SET_SRC (set))))
1031 break;
1033 after = next_active_insn (after);
1034 if (!after)
1035 break;
1038 /* Add NOP if third instruction is a store. */
1039 if (i == 1
1040 && ((set = single_set (after)) != NULL_RTX)
1041 && MEM_P (SET_DEST (set)))
1042 insert_nop = true;
1045 else
1046 /* Look for a single-word load into an odd-numbered FP register. */
1047 if (sparc_fix_at697f
1048 && NONJUMP_INSN_P (insn)
1049 && (set = single_set (insn)) != NULL_RTX
1050 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1051 && MEM_P (SET_SRC (set))
1052 && REG_P (SET_DEST (set))
1053 && REGNO (SET_DEST (set)) > 31
1054 && REGNO (SET_DEST (set)) % 2 != 0)
1056 /* The wrong dependency is on the enclosing double register. */
1057 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1058 unsigned int src1, src2, dest;
1059 int code;
1061 next = next_active_insn (insn);
1062 if (!next)
1063 break;
1064 /* If the insn is a branch, then it cannot be problematic. */
1065 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1066 continue;
1068 extract_insn (next);
1069 code = INSN_CODE (next);
1071 switch (code)
1073 case CODE_FOR_adddf3:
1074 case CODE_FOR_subdf3:
1075 case CODE_FOR_muldf3:
1076 case CODE_FOR_divdf3:
1077 dest = REGNO (recog_data.operand[0]);
1078 src1 = REGNO (recog_data.operand[1]);
1079 src2 = REGNO (recog_data.operand[2]);
1080 if (src1 != src2)
1082 /* Case [1-4]:
1083 ld [address], %fx+1
1084 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1085 if ((src1 == x || src2 == x)
1086 && (dest == src1 || dest == src2))
1087 insert_nop = true;
1089 else
1091 /* Case 5:
1092 ld [address], %fx+1
1093 FPOPd %fx, %fx, %fx */
1094 if (src1 == x
1095 && dest == src1
1096 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1097 insert_nop = true;
1099 break;
1101 case CODE_FOR_sqrtdf2:
1102 dest = REGNO (recog_data.operand[0]);
1103 src1 = REGNO (recog_data.operand[1]);
1104 /* Case 6:
1105 ld [address], %fx+1
1106 fsqrtd %fx, %fx */
1107 if (src1 == x && dest == src1)
1108 insert_nop = true;
1109 break;
1111 default:
1112 break;
1116 /* Look for a single-word load into an integer register. */
1117 else if (sparc_fix_ut699
1118 && NONJUMP_INSN_P (insn)
1119 && (set = single_set (insn)) != NULL_RTX
1120 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1121 && mem_ref (SET_SRC (set)) != NULL_RTX
1122 && REG_P (SET_DEST (set))
1123 && REGNO (SET_DEST (set)) < 32)
1125 /* There is no problem if the second memory access has a data
1126 dependency on the first single-cycle load. */
1127 rtx x = SET_DEST (set);
1129 next = next_active_insn (insn);
1130 if (!next)
1131 break;
1132 /* If the insn is a branch, then it cannot be problematic. */
1133 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1134 continue;
1136 /* Look for a second memory access to/from an integer register. */
1137 if ((set = single_set (next)) != NULL_RTX)
1139 rtx src = SET_SRC (set);
1140 rtx dest = SET_DEST (set);
1141 rtx mem;
1143 /* LDD is affected. */
1144 if ((mem = mem_ref (src)) != NULL_RTX
1145 && REG_P (dest)
1146 && REGNO (dest) < 32
1147 && !reg_mentioned_p (x, XEXP (mem, 0)))
1148 insert_nop = true;
1150 /* STD is *not* affected. */
1151 else if (MEM_P (dest)
1152 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1153 && (src == CONST0_RTX (GET_MODE (dest))
1154 || (REG_P (src)
1155 && REGNO (src) < 32
1156 && REGNO (src) != REGNO (x)))
1157 && !reg_mentioned_p (x, XEXP (dest, 0)))
1158 insert_nop = true;
1162 /* Look for a single-word load/operation into an FP register. */
1163 else if (sparc_fix_ut699
1164 && NONJUMP_INSN_P (insn)
1165 && (set = single_set (insn)) != NULL_RTX
1166 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1167 && REG_P (SET_DEST (set))
1168 && REGNO (SET_DEST (set)) > 31)
1170 /* Number of instructions in the problematic window. */
1171 const int n_insns = 4;
1172 /* The problematic combination is with the sibling FP register. */
1173 const unsigned int x = REGNO (SET_DEST (set));
1174 const unsigned int y = x ^ 1;
1175 rtx_insn *after;
1176 int i;
1178 next = next_active_insn (insn);
1179 if (!next)
1180 break;
1181 /* If the insn is a branch, then it cannot be problematic. */
1182 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1183 continue;
1185 /* Look for a second load/operation into the sibling FP register. */
1186 if (!((set = single_set (next)) != NULL_RTX
1187 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1188 && REG_P (SET_DEST (set))
1189 && REGNO (SET_DEST (set)) == y))
1190 continue;
1192 /* Look for a (possible) store from the FP register in the next N
1193 instructions, but bail out if it is again modified or if there
1194 is a store from the sibling FP register before this store. */
1195 for (after = next, i = 0; i < n_insns; i++)
1197 bool branch_p;
1199 after = next_active_insn (after);
1200 if (!after)
1201 break;
1203 /* This is a branch with an empty delay slot. */
1204 if (!NONJUMP_INSN_P (after))
1206 if (++i == n_insns)
1207 break;
1208 branch_p = true;
1209 after = NULL;
1211 /* This is a branch with a filled delay slot. */
1212 else if (rtx_sequence *seq =
1213 dyn_cast <rtx_sequence *> (PATTERN (after)))
1215 if (++i == n_insns)
1216 break;
1217 branch_p = true;
1218 after = seq->insn (1);
1220 /* This is a regular instruction. */
1221 else
1222 branch_p = false;
1224 if (after && (set = single_set (after)) != NULL_RTX)
1226 const rtx src = SET_SRC (set);
1227 const rtx dest = SET_DEST (set);
1228 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1230 /* If the FP register is again modified before the store,
1231 then the store isn't affected. */
1232 if (REG_P (dest)
1233 && (REGNO (dest) == x
1234 || (REGNO (dest) == y && size == 8)))
1235 break;
1237 if (MEM_P (dest) && REG_P (src))
1239 /* If there is a store from the sibling FP register
1240 before the store, then the store is not affected. */
1241 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1242 break;
1244 /* Otherwise, the store is affected. */
1245 if (REGNO (src) == x && size == 4)
1247 insert_nop = true;
1248 break;
1253 /* If we have a branch in the first M instructions, then we
1254 cannot see the (M+2)th instruction so we play safe. */
1255 if (branch_p && i <= (n_insns - 2))
1257 insert_nop = true;
1258 break;
1263 else
1264 next = NEXT_INSN (insn);
1266 if (insert_nop)
1267 emit_insn_before (gen_nop (), next);
1270 return 0;
1273 namespace {
1275 const pass_data pass_data_work_around_errata =
1277 RTL_PASS, /* type */
1278 "errata", /* name */
1279 OPTGROUP_NONE, /* optinfo_flags */
1280 TV_MACH_DEP, /* tv_id */
1281 0, /* properties_required */
1282 0, /* properties_provided */
1283 0, /* properties_destroyed */
1284 0, /* todo_flags_start */
1285 0, /* todo_flags_finish */
1288 class pass_work_around_errata : public rtl_opt_pass
1290 public:
1291 pass_work_around_errata(gcc::context *ctxt)
1292 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1295 /* opt_pass methods: */
1296 virtual bool gate (function *)
1298 return sparc_fix_at697f || sparc_fix_ut699 || sparc_fix_b2bst;
1301 virtual unsigned int execute (function *)
1303 return sparc_do_work_around_errata ();
1306 }; // class pass_work_around_errata
1308 } // anon namespace
1310 rtl_opt_pass *
1311 make_pass_work_around_errata (gcc::context *ctxt)
1313 return new pass_work_around_errata (ctxt);
1316 /* Helpers for TARGET_DEBUG_OPTIONS. */
1317 static void
1318 dump_target_flag_bits (const int flags)
1320 if (flags & MASK_64BIT)
1321 fprintf (stderr, "64BIT ");
1322 if (flags & MASK_APP_REGS)
1323 fprintf (stderr, "APP_REGS ");
1324 if (flags & MASK_FASTER_STRUCTS)
1325 fprintf (stderr, "FASTER_STRUCTS ");
1326 if (flags & MASK_FLAT)
1327 fprintf (stderr, "FLAT ");
1328 if (flags & MASK_FMAF)
1329 fprintf (stderr, "FMAF ");
1330 if (flags & MASK_FSMULD)
1331 fprintf (stderr, "FSMULD ");
1332 if (flags & MASK_FPU)
1333 fprintf (stderr, "FPU ");
1334 if (flags & MASK_HARD_QUAD)
1335 fprintf (stderr, "HARD_QUAD ");
1336 if (flags & MASK_POPC)
1337 fprintf (stderr, "POPC ");
1338 if (flags & MASK_PTR64)
1339 fprintf (stderr, "PTR64 ");
1340 if (flags & MASK_STACK_BIAS)
1341 fprintf (stderr, "STACK_BIAS ");
1342 if (flags & MASK_UNALIGNED_DOUBLES)
1343 fprintf (stderr, "UNALIGNED_DOUBLES ");
1344 if (flags & MASK_V8PLUS)
1345 fprintf (stderr, "V8PLUS ");
1346 if (flags & MASK_VIS)
1347 fprintf (stderr, "VIS ");
1348 if (flags & MASK_VIS2)
1349 fprintf (stderr, "VIS2 ");
1350 if (flags & MASK_VIS3)
1351 fprintf (stderr, "VIS3 ");
1352 if (flags & MASK_VIS4)
1353 fprintf (stderr, "VIS4 ");
1354 if (flags & MASK_VIS4B)
1355 fprintf (stderr, "VIS4B ");
1356 if (flags & MASK_CBCOND)
1357 fprintf (stderr, "CBCOND ");
1358 if (flags & MASK_DEPRECATED_V8_INSNS)
1359 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1360 if (flags & MASK_SPARCLET)
1361 fprintf (stderr, "SPARCLET ");
1362 if (flags & MASK_SPARCLITE)
1363 fprintf (stderr, "SPARCLITE ");
1364 if (flags & MASK_V8)
1365 fprintf (stderr, "V8 ");
1366 if (flags & MASK_V9)
1367 fprintf (stderr, "V9 ");
1370 static void
1371 dump_target_flags (const char *prefix, const int flags)
1373 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1374 dump_target_flag_bits (flags);
1375 fprintf(stderr, "]\n");
1378 /* Validate and override various options, and do some machine dependent
1379 initialization. */
1381 static void
1382 sparc_option_override (void)
1384 static struct code_model {
1385 const char *const name;
1386 const enum cmodel value;
1387 } const cmodels[] = {
1388 { "32", CM_32 },
1389 { "medlow", CM_MEDLOW },
1390 { "medmid", CM_MEDMID },
1391 { "medany", CM_MEDANY },
1392 { "embmedany", CM_EMBMEDANY },
1393 { NULL, (enum cmodel) 0 }
1395 const struct code_model *cmodel;
1396 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1397 static struct cpu_default {
1398 const int cpu;
1399 const enum processor_type processor;
1400 } const cpu_default[] = {
1401 /* There must be one entry here for each TARGET_CPU value. */
1402 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1403 { TARGET_CPU_v8, PROCESSOR_V8 },
1404 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1405 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1406 { TARGET_CPU_leon, PROCESSOR_LEON },
1407 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1408 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1409 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1410 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1411 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1412 { TARGET_CPU_v9, PROCESSOR_V9 },
1413 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1414 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1415 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1416 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1417 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1418 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1419 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1420 { TARGET_CPU_m8, PROCESSOR_M8 },
1421 { -1, PROCESSOR_V7 }
1423 const struct cpu_default *def;
1424 /* Table of values for -m{cpu,tune}=. This must match the order of
1425 the enum processor_type in sparc-opts.h. */
1426 static struct cpu_table {
1427 const char *const name;
1428 const int disable;
1429 const int enable;
1430 } const cpu_table[] = {
1431 { "v7", MASK_ISA|MASK_FSMULD, 0 },
1432 { "cypress", MASK_ISA|MASK_FSMULD, 0 },
1433 { "v8", MASK_ISA, MASK_V8 },
1434 /* TI TMS390Z55 supersparc */
1435 { "supersparc", MASK_ISA, MASK_V8 },
1436 { "hypersparc", MASK_ISA, MASK_V8 },
1437 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1438 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1439 { "leon3v7", MASK_ISA|MASK_FSMULD, MASK_LEON3 },
1440 { "sparclite", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1441 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1442 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1443 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1444 { "f934", MASK_ISA|MASK_FSMULD, MASK_SPARCLITE },
1445 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1446 { "sparclet", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1447 /* TEMIC sparclet */
1448 { "tsc701", MASK_ISA|MASK_FSMULD, MASK_SPARCLET },
1449 { "v9", MASK_ISA, MASK_V9 },
1450 /* UltraSPARC I, II, IIi */
1451 { "ultrasparc", MASK_ISA,
1452 /* Although insns using %y are deprecated, it is a clear win. */
1453 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1454 /* UltraSPARC III */
1455 /* ??? Check if %y issue still holds true. */
1456 { "ultrasparc3", MASK_ISA,
1457 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1458 /* UltraSPARC T1 */
1459 { "niagara", MASK_ISA,
1460 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1461 /* UltraSPARC T2 */
1462 { "niagara2", MASK_ISA,
1463 MASK_V9|MASK_POPC|MASK_VIS2 },
1464 /* UltraSPARC T3 */
1465 { "niagara3", MASK_ISA,
1466 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1467 /* UltraSPARC T4 */
1468 { "niagara4", MASK_ISA,
1469 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1470 /* UltraSPARC M7 */
1471 { "niagara7", MASK_ISA,
1472 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1473 /* UltraSPARC M8 */
1474 { "m8", MASK_ISA,
1475 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC|MASK_VIS4B }
1477 const struct cpu_table *cpu;
1478 unsigned int i;
1480 if (sparc_debug_string != NULL)
1482 const char *q;
1483 char *p;
1485 p = ASTRDUP (sparc_debug_string);
1486 while ((q = strtok (p, ",")) != NULL)
1488 bool invert;
1489 int mask;
1491 p = NULL;
1492 if (*q == '!')
1494 invert = true;
1495 q++;
1497 else
1498 invert = false;
1500 if (! strcmp (q, "all"))
1501 mask = MASK_DEBUG_ALL;
1502 else if (! strcmp (q, "options"))
1503 mask = MASK_DEBUG_OPTIONS;
1504 else
1505 error ("unknown -mdebug-%s switch", q);
1507 if (invert)
1508 sparc_debug &= ~mask;
1509 else
1510 sparc_debug |= mask;
1514 /* Enable the FsMULd instruction by default if not explicitly specified by
1515 the user. It may be later disabled by the CPU (explicitly or not). */
1516 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1517 target_flags |= MASK_FSMULD;
1519 if (TARGET_DEBUG_OPTIONS)
1521 dump_target_flags("Initial target_flags", target_flags);
1522 dump_target_flags("target_flags_explicit", target_flags_explicit);
1525 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1526 SUBTARGET_OVERRIDE_OPTIONS;
1527 #endif
1529 #ifndef SPARC_BI_ARCH
1530 /* Check for unsupported architecture size. */
1531 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1532 error ("%s is not supported by this configuration",
1533 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1534 #endif
1536 /* We force all 64bit archs to use 128 bit long double */
1537 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1539 error ("-mlong-double-64 not allowed with -m64");
1540 target_flags |= MASK_LONG_DOUBLE_128;
1543 /* Code model selection. */
1544 sparc_cmodel = SPARC_DEFAULT_CMODEL;
1546 #ifdef SPARC_BI_ARCH
1547 if (TARGET_ARCH32)
1548 sparc_cmodel = CM_32;
1549 #endif
1551 if (sparc_cmodel_string != NULL)
1553 if (TARGET_ARCH64)
1555 for (cmodel = &cmodels[0]; cmodel->name; cmodel++)
1556 if (strcmp (sparc_cmodel_string, cmodel->name) == 0)
1557 break;
1558 if (cmodel->name == NULL)
1559 error ("bad value (%s) for -mcmodel= switch", sparc_cmodel_string);
1560 else
1561 sparc_cmodel = cmodel->value;
1563 else
1564 error ("-mcmodel= is not supported on 32-bit systems");
1567 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1568 for (i = 8; i < 16; i++)
1569 if (!call_used_regs [i])
1571 error ("-fcall-saved-REG is not supported for out registers");
1572 call_used_regs [i] = 1;
1575 /* Set the default CPU if no -mcpu option was specified. */
1576 if (!global_options_set.x_sparc_cpu_and_features)
1578 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1579 if (def->cpu == TARGET_CPU_DEFAULT)
1580 break;
1581 gcc_assert (def->cpu != -1);
1582 sparc_cpu_and_features = def->processor;
1585 /* Set the default CPU if no -mtune option was specified. */
1586 if (!global_options_set.x_sparc_cpu)
1587 sparc_cpu = sparc_cpu_and_features;
1589 cpu = &cpu_table[(int) sparc_cpu_and_features];
1591 if (TARGET_DEBUG_OPTIONS)
1593 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1594 dump_target_flags ("cpu->disable", cpu->disable);
1595 dump_target_flags ("cpu->enable", cpu->enable);
1598 target_flags &= ~cpu->disable;
1599 target_flags |= (cpu->enable
1600 #ifndef HAVE_AS_FMAF_HPC_VIS3
1601 & ~(MASK_FMAF | MASK_VIS3)
1602 #endif
1603 #ifndef HAVE_AS_SPARC4
1604 & ~MASK_CBCOND
1605 #endif
1606 #ifndef HAVE_AS_SPARC5_VIS4
1607 & ~(MASK_VIS4 | MASK_SUBXC)
1608 #endif
1609 #ifndef HAVE_AS_SPARC6
1610 & ~(MASK_VIS4B)
1611 #endif
1612 #ifndef HAVE_AS_LEON
1613 & ~(MASK_LEON | MASK_LEON3)
1614 #endif
1615 & ~(target_flags_explicit & MASK_FEATURES)
1618 /* -mvis2 implies -mvis. */
1619 if (TARGET_VIS2)
1620 target_flags |= MASK_VIS;
1622 /* -mvis3 implies -mvis2 and -mvis. */
1623 if (TARGET_VIS3)
1624 target_flags |= MASK_VIS2 | MASK_VIS;
1626 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1627 if (TARGET_VIS4)
1628 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1630 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1631 if (TARGET_VIS4B)
1632 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1634 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1635 FPU is disabled. */
1636 if (!TARGET_FPU)
1637 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1638 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1640 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1641 are available; -m64 also implies v9. */
1642 if (TARGET_VIS || TARGET_ARCH64)
1644 target_flags |= MASK_V9;
1645 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1648 /* -mvis also implies -mv8plus on 32-bit. */
1649 if (TARGET_VIS && !TARGET_ARCH64)
1650 target_flags |= MASK_V8PLUS;
1652 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1653 if (TARGET_V9 && TARGET_ARCH32)
1654 target_flags |= MASK_DEPRECATED_V8_INSNS;
1656 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1657 if (!TARGET_V9 || TARGET_ARCH64)
1658 target_flags &= ~MASK_V8PLUS;
1660 /* Don't use stack biasing in 32-bit mode. */
1661 if (TARGET_ARCH32)
1662 target_flags &= ~MASK_STACK_BIAS;
1664 /* Use LRA instead of reload, unless otherwise instructed. */
1665 if (!(target_flags_explicit & MASK_LRA))
1666 target_flags |= MASK_LRA;
1668 /* Enable the back-to-back store errata workaround for LEON3FT. */
1669 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1670 sparc_fix_b2bst = 1;
1672 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1673 if (sparc_fix_ut699)
1674 target_flags &= ~MASK_FSMULD;
1676 /* Supply a default value for align_functions. */
1677 if (align_functions == 0)
1679 if (sparc_cpu == PROCESSOR_ULTRASPARC
1680 || sparc_cpu == PROCESSOR_ULTRASPARC3
1681 || sparc_cpu == PROCESSOR_NIAGARA
1682 || sparc_cpu == PROCESSOR_NIAGARA2
1683 || sparc_cpu == PROCESSOR_NIAGARA3
1684 || sparc_cpu == PROCESSOR_NIAGARA4)
1685 align_functions = 32;
1686 else if (sparc_cpu == PROCESSOR_NIAGARA7
1687 || sparc_cpu == PROCESSOR_M8)
1688 align_functions = 64;
1691 /* Validate PCC_STRUCT_RETURN. */
1692 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1693 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1695 /* Only use .uaxword when compiling for a 64-bit target. */
1696 if (!TARGET_ARCH64)
1697 targetm.asm_out.unaligned_op.di = NULL;
1699 /* Do various machine dependent initializations. */
1700 sparc_init_modes ();
1702 /* Set up function hooks. */
1703 init_machine_status = sparc_init_machine_status;
1705 switch (sparc_cpu)
1707 case PROCESSOR_V7:
1708 case PROCESSOR_CYPRESS:
1709 sparc_costs = &cypress_costs;
1710 break;
1711 case PROCESSOR_V8:
1712 case PROCESSOR_SPARCLITE:
1713 case PROCESSOR_SUPERSPARC:
1714 sparc_costs = &supersparc_costs;
1715 break;
1716 case PROCESSOR_F930:
1717 case PROCESSOR_F934:
1718 case PROCESSOR_HYPERSPARC:
1719 case PROCESSOR_SPARCLITE86X:
1720 sparc_costs = &hypersparc_costs;
1721 break;
1722 case PROCESSOR_LEON:
1723 sparc_costs = &leon_costs;
1724 break;
1725 case PROCESSOR_LEON3:
1726 case PROCESSOR_LEON3V7:
1727 sparc_costs = &leon3_costs;
1728 break;
1729 case PROCESSOR_SPARCLET:
1730 case PROCESSOR_TSC701:
1731 sparc_costs = &sparclet_costs;
1732 break;
1733 case PROCESSOR_V9:
1734 case PROCESSOR_ULTRASPARC:
1735 sparc_costs = &ultrasparc_costs;
1736 break;
1737 case PROCESSOR_ULTRASPARC3:
1738 sparc_costs = &ultrasparc3_costs;
1739 break;
1740 case PROCESSOR_NIAGARA:
1741 sparc_costs = &niagara_costs;
1742 break;
1743 case PROCESSOR_NIAGARA2:
1744 sparc_costs = &niagara2_costs;
1745 break;
1746 case PROCESSOR_NIAGARA3:
1747 sparc_costs = &niagara3_costs;
1748 break;
1749 case PROCESSOR_NIAGARA4:
1750 sparc_costs = &niagara4_costs;
1751 break;
1752 case PROCESSOR_NIAGARA7:
1753 sparc_costs = &niagara7_costs;
1754 break;
1755 case PROCESSOR_M8:
1756 sparc_costs = &m8_costs;
1757 break;
1758 case PROCESSOR_NATIVE:
1759 gcc_unreachable ();
1762 if (sparc_memory_model == SMM_DEFAULT)
1764 /* Choose the memory model for the operating system. */
1765 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1766 if (os_default != SMM_DEFAULT)
1767 sparc_memory_model = os_default;
1768 /* Choose the most relaxed model for the processor. */
1769 else if (TARGET_V9)
1770 sparc_memory_model = SMM_RMO;
1771 else if (TARGET_LEON3)
1772 sparc_memory_model = SMM_TSO;
1773 else if (TARGET_LEON)
1774 sparc_memory_model = SMM_SC;
1775 else if (TARGET_V8)
1776 sparc_memory_model = SMM_PSO;
1777 else
1778 sparc_memory_model = SMM_SC;
1781 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1782 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1783 target_flags |= MASK_LONG_DOUBLE_128;
1784 #endif
1786 if (TARGET_DEBUG_OPTIONS)
1787 dump_target_flags ("Final target_flags", target_flags);
1789 /* PARAM_SIMULTANEOUS_PREFETCHES is the number of prefetches that
1790 can run at the same time. More important, it is the threshold
1791 defining when additional prefetches will be dropped by the
1792 hardware.
1794 The UltraSPARC-III features a documented prefetch queue with a
1795 size of 8. Additional prefetches issued in the cpu are
1796 dropped.
1798 Niagara processors are different. In these processors prefetches
1799 are handled much like regular loads. The L1 miss buffer is 32
1800 entries, but prefetches start getting affected when 30 entries
1801 become occupied. That occupation could be a mix of regular loads
1802 and prefetches though. And that buffer is shared by all threads.
1803 Once the threshold is reached, if the core is running a single
1804 thread the prefetch will retry. If more than one thread is
1805 running, the prefetch will be dropped.
1807 All this makes it very difficult to determine how many
1808 simultaneous prefetches can be issued simultaneously, even in a
1809 single-threaded program. Experimental results show that setting
1810 this parameter to 32 works well when the number of threads is not
1811 high. */
1812 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
1813 ((sparc_cpu == PROCESSOR_ULTRASPARC
1814 || sparc_cpu == PROCESSOR_NIAGARA
1815 || sparc_cpu == PROCESSOR_NIAGARA2
1816 || sparc_cpu == PROCESSOR_NIAGARA3
1817 || sparc_cpu == PROCESSOR_NIAGARA4)
1819 : (sparc_cpu == PROCESSOR_ULTRASPARC3
1820 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
1821 || sparc_cpu == PROCESSOR_M8)
1822 ? 32 : 3))),
1823 global_options.x_param_values,
1824 global_options_set.x_param_values);
1826 /* PARAM_L1_CACHE_LINE_SIZE is the size of the L1 cache line, in
1827 bytes.
1829 The Oracle SPARC Architecture (previously the UltraSPARC
1830 Architecture) specification states that when a PREFETCH[A]
1831 instruction is executed an implementation-specific amount of data
1832 is prefetched, and that it is at least 64 bytes long (aligned to
1833 at least 64 bytes).
1835 However, this is not correct. The M7 (and implementations prior
1836 to that) does not guarantee a 64B prefetch into a cache if the
1837 line size is smaller. A single cache line is all that is ever
1838 prefetched. So for the M7, where the L1D$ has 32B lines and the
1839 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
1840 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
1841 is a read_n prefetch, which is the only type which allocates to
1842 the L1.) */
1843 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
1844 (sparc_cpu == PROCESSOR_M8
1845 ? 64 : 32),
1846 global_options.x_param_values,
1847 global_options_set.x_param_values);
1849 /* PARAM_L1_CACHE_SIZE is the size of the L1D$ (most SPARC chips use
1850 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
1851 Niagara processors feature a L1D$ of 16KB. */
1852 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
1853 ((sparc_cpu == PROCESSOR_ULTRASPARC
1854 || sparc_cpu == PROCESSOR_ULTRASPARC3
1855 || sparc_cpu == PROCESSOR_NIAGARA
1856 || sparc_cpu == PROCESSOR_NIAGARA2
1857 || sparc_cpu == PROCESSOR_NIAGARA3
1858 || sparc_cpu == PROCESSOR_NIAGARA4
1859 || sparc_cpu == PROCESSOR_NIAGARA7
1860 || sparc_cpu == PROCESSOR_M8)
1861 ? 16 : 64),
1862 global_options.x_param_values,
1863 global_options_set.x_param_values);
1866 /* PARAM_L2_CACHE_SIZE is the size fo the L2 in kilobytes. Note
1867 that 512 is the default in params.def. */
1868 maybe_set_param_value (PARAM_L2_CACHE_SIZE,
1869 ((sparc_cpu == PROCESSOR_NIAGARA4
1870 || sparc_cpu == PROCESSOR_M8)
1871 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
1872 ? 256 : 512)),
1873 global_options.x_param_values,
1874 global_options_set.x_param_values);
1877 /* Disable save slot sharing for call-clobbered registers by default.
1878 The IRA sharing algorithm works on single registers only and this
1879 pessimizes for double floating-point registers. */
1880 if (!global_options_set.x_flag_ira_share_save_slots)
1881 flag_ira_share_save_slots = 0;
1883 /* Only enable REE by default in 64-bit mode where it helps to eliminate
1884 redundant 32-to-64-bit extensions. */
1885 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
1886 flag_ree = 0;
1889 /* Miscellaneous utilities. */
1891 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
1892 or branch on register contents instructions. */
1895 v9_regcmp_p (enum rtx_code code)
1897 return (code == EQ || code == NE || code == GE || code == LT
1898 || code == LE || code == GT);
1901 /* Nonzero if OP is a floating point constant which can
1902 be loaded into an integer register using a single
1903 sethi instruction. */
1906 fp_sethi_p (rtx op)
1908 if (GET_CODE (op) == CONST_DOUBLE)
1910 long i;
1912 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1913 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
1916 return 0;
1919 /* Nonzero if OP is a floating point constant which can
1920 be loaded into an integer register using a single
1921 mov instruction. */
1924 fp_mov_p (rtx op)
1926 if (GET_CODE (op) == CONST_DOUBLE)
1928 long i;
1930 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1931 return SPARC_SIMM13_P (i);
1934 return 0;
1937 /* Nonzero if OP is a floating point constant which can
1938 be loaded into an integer register using a high/losum
1939 instruction sequence. */
1942 fp_high_losum_p (rtx op)
1944 /* The constraints calling this should only be in
1945 SFmode move insns, so any constant which cannot
1946 be moved using a single insn will do. */
1947 if (GET_CODE (op) == CONST_DOUBLE)
1949 long i;
1951 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
1952 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
1955 return 0;
1958 /* Return true if the address of LABEL can be loaded by means of the
1959 mov{si,di}_pic_label_ref patterns in PIC mode. */
1961 static bool
1962 can_use_mov_pic_label_ref (rtx label)
1964 /* VxWorks does not impose a fixed gap between segments; the run-time
1965 gap can be different from the object-file gap. We therefore can't
1966 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
1967 are absolutely sure that X is in the same segment as the GOT.
1968 Unfortunately, the flexibility of linker scripts means that we
1969 can't be sure of that in general, so assume that GOT-relative
1970 accesses are never valid on VxWorks. */
1971 if (TARGET_VXWORKS_RTP)
1972 return false;
1974 /* Similarly, if the label is non-local, it might end up being placed
1975 in a different section than the current one; now mov_pic_label_ref
1976 requires the label and the code to be in the same section. */
1977 if (LABEL_REF_NONLOCAL_P (label))
1978 return false;
1980 /* Finally, if we are reordering basic blocks and partition into hot
1981 and cold sections, this might happen for any label. */
1982 if (flag_reorder_blocks_and_partition)
1983 return false;
1985 return true;
1988 /* Expand a move instruction. Return true if all work is done. */
1990 bool
1991 sparc_expand_move (machine_mode mode, rtx *operands)
1993 /* Handle sets of MEM first. */
1994 if (GET_CODE (operands[0]) == MEM)
1996 /* 0 is a register (or a pair of registers) on SPARC. */
1997 if (register_or_zero_operand (operands[1], mode))
1998 return false;
2000 if (!reload_in_progress)
2002 operands[0] = validize_mem (operands[0]);
2003 operands[1] = force_reg (mode, operands[1]);
2007 /* Fixup TLS cases. */
2008 if (TARGET_HAVE_TLS
2009 && CONSTANT_P (operands[1])
2010 && sparc_tls_referenced_p (operands [1]))
2012 operands[1] = sparc_legitimize_tls_address (operands[1]);
2013 return false;
2016 /* Fixup PIC cases. */
2017 if (flag_pic && CONSTANT_P (operands[1]))
2019 if (pic_address_needs_scratch (operands[1]))
2020 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2022 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2023 if (GET_CODE (operands[1]) == LABEL_REF
2024 && can_use_mov_pic_label_ref (operands[1]))
2026 if (mode == SImode)
2028 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2029 return true;
2032 if (mode == DImode)
2034 gcc_assert (TARGET_ARCH64);
2035 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2036 return true;
2040 if (symbolic_operand (operands[1], mode))
2042 operands[1]
2043 = sparc_legitimize_pic_address (operands[1],
2044 reload_in_progress
2045 ? operands[0] : NULL_RTX);
2046 return false;
2050 /* If we are trying to toss an integer constant into FP registers,
2051 or loading a FP or vector constant, force it into memory. */
2052 if (CONSTANT_P (operands[1])
2053 && REG_P (operands[0])
2054 && (SPARC_FP_REG_P (REGNO (operands[0]))
2055 || SCALAR_FLOAT_MODE_P (mode)
2056 || VECTOR_MODE_P (mode)))
2058 /* emit_group_store will send such bogosity to us when it is
2059 not storing directly into memory. So fix this up to avoid
2060 crashes in output_constant_pool. */
2061 if (operands [1] == const0_rtx)
2062 operands[1] = CONST0_RTX (mode);
2064 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2065 always other regs. */
2066 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2067 && (const_zero_operand (operands[1], mode)
2068 || const_all_ones_operand (operands[1], mode)))
2069 return false;
2071 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2072 /* We are able to build any SF constant in integer registers
2073 with at most 2 instructions. */
2074 && (mode == SFmode
2075 /* And any DF constant in integer registers if needed. */
2076 || (mode == DFmode && !can_create_pseudo_p ())))
2077 return false;
2079 operands[1] = force_const_mem (mode, operands[1]);
2080 if (!reload_in_progress)
2081 operands[1] = validize_mem (operands[1]);
2082 return false;
2085 /* Accept non-constants and valid constants unmodified. */
2086 if (!CONSTANT_P (operands[1])
2087 || GET_CODE (operands[1]) == HIGH
2088 || input_operand (operands[1], mode))
2089 return false;
2091 switch (mode)
2093 case E_QImode:
2094 /* All QImode constants require only one insn, so proceed. */
2095 break;
2097 case E_HImode:
2098 case E_SImode:
2099 sparc_emit_set_const32 (operands[0], operands[1]);
2100 return true;
2102 case E_DImode:
2103 /* input_operand should have filtered out 32-bit mode. */
2104 sparc_emit_set_const64 (operands[0], operands[1]);
2105 return true;
2107 case E_TImode:
2109 rtx high, low;
2110 /* TImode isn't available in 32-bit mode. */
2111 split_double (operands[1], &high, &low);
2112 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2113 high));
2114 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2115 low));
2117 return true;
2119 default:
2120 gcc_unreachable ();
2123 return false;
2126 /* Load OP1, a 32-bit constant, into OP0, a register.
2127 We know it can't be done in one insn when we get
2128 here, the move expander guarantees this. */
2130 static void
2131 sparc_emit_set_const32 (rtx op0, rtx op1)
2133 machine_mode mode = GET_MODE (op0);
2134 rtx temp = op0;
2136 if (can_create_pseudo_p ())
2137 temp = gen_reg_rtx (mode);
2139 if (GET_CODE (op1) == CONST_INT)
2141 gcc_assert (!small_int_operand (op1, mode)
2142 && !const_high_operand (op1, mode));
2144 /* Emit them as real moves instead of a HIGH/LO_SUM,
2145 this way CSE can see everything and reuse intermediate
2146 values if it wants. */
2147 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2148 & ~(HOST_WIDE_INT) 0x3ff)));
2150 emit_insn (gen_rtx_SET (op0,
2151 gen_rtx_IOR (mode, temp,
2152 GEN_INT (INTVAL (op1) & 0x3ff))));
2154 else
2156 /* A symbol, emit in the traditional way. */
2157 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2158 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2162 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2163 If TEMP is nonzero, we are forbidden to use any other scratch
2164 registers. Otherwise, we are allowed to generate them as needed.
2166 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2167 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2169 void
2170 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2172 rtx cst, temp1, temp2, temp3, temp4, temp5;
2173 rtx ti_temp = 0;
2175 /* Deal with too large offsets. */
2176 if (GET_CODE (op1) == CONST
2177 && GET_CODE (XEXP (op1, 0)) == PLUS
2178 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2179 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2181 gcc_assert (!temp);
2182 temp1 = gen_reg_rtx (DImode);
2183 temp2 = gen_reg_rtx (DImode);
2184 sparc_emit_set_const64 (temp2, cst);
2185 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2186 NULL_RTX);
2187 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2188 return;
2191 if (temp && GET_MODE (temp) == TImode)
2193 ti_temp = temp;
2194 temp = gen_rtx_REG (DImode, REGNO (temp));
2197 /* SPARC-V9 code-model support. */
2198 switch (sparc_cmodel)
2200 case CM_MEDLOW:
2201 /* The range spanned by all instructions in the object is less
2202 than 2^31 bytes (2GB) and the distance from any instruction
2203 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2204 than 2^31 bytes (2GB).
2206 The executable must be in the low 4TB of the virtual address
2207 space.
2209 sethi %hi(symbol), %temp1
2210 or %temp1, %lo(symbol), %reg */
2211 if (temp)
2212 temp1 = temp; /* op0 is allowed. */
2213 else
2214 temp1 = gen_reg_rtx (DImode);
2216 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2217 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2218 break;
2220 case CM_MEDMID:
2221 /* The range spanned by all instructions in the object is less
2222 than 2^31 bytes (2GB) and the distance from any instruction
2223 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2224 than 2^31 bytes (2GB).
2226 The executable must be in the low 16TB of the virtual address
2227 space.
2229 sethi %h44(symbol), %temp1
2230 or %temp1, %m44(symbol), %temp2
2231 sllx %temp2, 12, %temp3
2232 or %temp3, %l44(symbol), %reg */
2233 if (temp)
2235 temp1 = op0;
2236 temp2 = op0;
2237 temp3 = temp; /* op0 is allowed. */
2239 else
2241 temp1 = gen_reg_rtx (DImode);
2242 temp2 = gen_reg_rtx (DImode);
2243 temp3 = gen_reg_rtx (DImode);
2246 emit_insn (gen_seth44 (temp1, op1));
2247 emit_insn (gen_setm44 (temp2, temp1, op1));
2248 emit_insn (gen_rtx_SET (temp3,
2249 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2250 emit_insn (gen_setl44 (op0, temp3, op1));
2251 break;
2253 case CM_MEDANY:
2254 /* The range spanned by all instructions in the object is less
2255 than 2^31 bytes (2GB) and the distance from any instruction
2256 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2257 than 2^31 bytes (2GB).
2259 The executable can be placed anywhere in the virtual address
2260 space.
2262 sethi %hh(symbol), %temp1
2263 sethi %lm(symbol), %temp2
2264 or %temp1, %hm(symbol), %temp3
2265 sllx %temp3, 32, %temp4
2266 or %temp4, %temp2, %temp5
2267 or %temp5, %lo(symbol), %reg */
2268 if (temp)
2270 /* It is possible that one of the registers we got for operands[2]
2271 might coincide with that of operands[0] (which is why we made
2272 it TImode). Pick the other one to use as our scratch. */
2273 if (rtx_equal_p (temp, op0))
2275 gcc_assert (ti_temp);
2276 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2278 temp1 = op0;
2279 temp2 = temp; /* op0 is _not_ allowed, see above. */
2280 temp3 = op0;
2281 temp4 = op0;
2282 temp5 = op0;
2284 else
2286 temp1 = gen_reg_rtx (DImode);
2287 temp2 = gen_reg_rtx (DImode);
2288 temp3 = gen_reg_rtx (DImode);
2289 temp4 = gen_reg_rtx (DImode);
2290 temp5 = gen_reg_rtx (DImode);
2293 emit_insn (gen_sethh (temp1, op1));
2294 emit_insn (gen_setlm (temp2, op1));
2295 emit_insn (gen_sethm (temp3, temp1, op1));
2296 emit_insn (gen_rtx_SET (temp4,
2297 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2298 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2299 emit_insn (gen_setlo (op0, temp5, op1));
2300 break;
2302 case CM_EMBMEDANY:
2303 /* Old old old backwards compatibility kruft here.
2304 Essentially it is MEDLOW with a fixed 64-bit
2305 virtual base added to all data segment addresses.
2306 Text-segment stuff is computed like MEDANY, we can't
2307 reuse the code above because the relocation knobs
2308 look different.
2310 Data segment: sethi %hi(symbol), %temp1
2311 add %temp1, EMBMEDANY_BASE_REG, %temp2
2312 or %temp2, %lo(symbol), %reg */
2313 if (data_segment_operand (op1, GET_MODE (op1)))
2315 if (temp)
2317 temp1 = temp; /* op0 is allowed. */
2318 temp2 = op0;
2320 else
2322 temp1 = gen_reg_rtx (DImode);
2323 temp2 = gen_reg_rtx (DImode);
2326 emit_insn (gen_embmedany_sethi (temp1, op1));
2327 emit_insn (gen_embmedany_brsum (temp2, temp1));
2328 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2331 /* Text segment: sethi %uhi(symbol), %temp1
2332 sethi %hi(symbol), %temp2
2333 or %temp1, %ulo(symbol), %temp3
2334 sllx %temp3, 32, %temp4
2335 or %temp4, %temp2, %temp5
2336 or %temp5, %lo(symbol), %reg */
2337 else
2339 if (temp)
2341 /* It is possible that one of the registers we got for operands[2]
2342 might coincide with that of operands[0] (which is why we made
2343 it TImode). Pick the other one to use as our scratch. */
2344 if (rtx_equal_p (temp, op0))
2346 gcc_assert (ti_temp);
2347 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2349 temp1 = op0;
2350 temp2 = temp; /* op0 is _not_ allowed, see above. */
2351 temp3 = op0;
2352 temp4 = op0;
2353 temp5 = op0;
2355 else
2357 temp1 = gen_reg_rtx (DImode);
2358 temp2 = gen_reg_rtx (DImode);
2359 temp3 = gen_reg_rtx (DImode);
2360 temp4 = gen_reg_rtx (DImode);
2361 temp5 = gen_reg_rtx (DImode);
2364 emit_insn (gen_embmedany_textuhi (temp1, op1));
2365 emit_insn (gen_embmedany_texthi (temp2, op1));
2366 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2367 emit_insn (gen_rtx_SET (temp4,
2368 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2369 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2370 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2372 break;
2374 default:
2375 gcc_unreachable ();
2379 /* These avoid problems when cross compiling. If we do not
2380 go through all this hair then the optimizer will see
2381 invalid REG_EQUAL notes or in some cases none at all. */
2382 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2383 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2384 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2385 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2387 /* The optimizer is not to assume anything about exactly
2388 which bits are set for a HIGH, they are unspecified.
2389 Unfortunately this leads to many missed optimizations
2390 during CSE. We mask out the non-HIGH bits, and matches
2391 a plain movdi, to alleviate this problem. */
2392 static rtx
2393 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2395 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2398 static rtx
2399 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2401 return gen_rtx_SET (dest, GEN_INT (val));
2404 static rtx
2405 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2407 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2410 static rtx
2411 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2413 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2416 /* Worker routines for 64-bit constant formation on arch64.
2417 One of the key things to be doing in these emissions is
2418 to create as many temp REGs as possible. This makes it
2419 possible for half-built constants to be used later when
2420 such values are similar to something required later on.
2421 Without doing this, the optimizer cannot see such
2422 opportunities. */
2424 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2425 unsigned HOST_WIDE_INT, int);
2427 static void
2428 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2429 unsigned HOST_WIDE_INT low_bits, int is_neg)
2431 unsigned HOST_WIDE_INT high_bits;
2433 if (is_neg)
2434 high_bits = (~low_bits) & 0xffffffff;
2435 else
2436 high_bits = low_bits;
2438 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2439 if (!is_neg)
2441 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2443 else
2445 /* If we are XOR'ing with -1, then we should emit a one's complement
2446 instead. This way the combiner will notice logical operations
2447 such as ANDN later on and substitute. */
2448 if ((low_bits & 0x3ff) == 0x3ff)
2450 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2452 else
2454 emit_insn (gen_rtx_SET (op0,
2455 gen_safe_XOR64 (temp,
2456 (-(HOST_WIDE_INT)0x400
2457 | (low_bits & 0x3ff)))));
2462 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2463 unsigned HOST_WIDE_INT, int);
2465 static void
2466 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2467 unsigned HOST_WIDE_INT high_bits,
2468 unsigned HOST_WIDE_INT low_immediate,
2469 int shift_count)
2471 rtx temp2 = op0;
2473 if ((high_bits & 0xfffffc00) != 0)
2475 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2476 if ((high_bits & ~0xfffffc00) != 0)
2477 emit_insn (gen_rtx_SET (op0,
2478 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2479 else
2480 temp2 = temp;
2482 else
2484 emit_insn (gen_safe_SET64 (temp, high_bits));
2485 temp2 = temp;
2488 /* Now shift it up into place. */
2489 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2490 GEN_INT (shift_count))));
2492 /* If there is a low immediate part piece, finish up by
2493 putting that in as well. */
2494 if (low_immediate != 0)
2495 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2498 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2499 unsigned HOST_WIDE_INT);
2501 /* Full 64-bit constant decomposition. Even though this is the
2502 'worst' case, we still optimize a few things away. */
2503 static void
2504 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2505 unsigned HOST_WIDE_INT high_bits,
2506 unsigned HOST_WIDE_INT low_bits)
2508 rtx sub_temp = op0;
2510 if (can_create_pseudo_p ())
2511 sub_temp = gen_reg_rtx (DImode);
2513 if ((high_bits & 0xfffffc00) != 0)
2515 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2516 if ((high_bits & ~0xfffffc00) != 0)
2517 emit_insn (gen_rtx_SET (sub_temp,
2518 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2519 else
2520 sub_temp = temp;
2522 else
2524 emit_insn (gen_safe_SET64 (temp, high_bits));
2525 sub_temp = temp;
2528 if (can_create_pseudo_p ())
2530 rtx temp2 = gen_reg_rtx (DImode);
2531 rtx temp3 = gen_reg_rtx (DImode);
2532 rtx temp4 = gen_reg_rtx (DImode);
2534 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2535 GEN_INT (32))));
2537 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2538 if ((low_bits & ~0xfffffc00) != 0)
2540 emit_insn (gen_rtx_SET (temp3,
2541 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2542 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2544 else
2546 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2549 else
2551 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2552 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2553 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2554 int to_shift = 12;
2556 /* We are in the middle of reload, so this is really
2557 painful. However we do still make an attempt to
2558 avoid emitting truly stupid code. */
2559 if (low1 != const0_rtx)
2561 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2562 GEN_INT (to_shift))));
2563 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2564 sub_temp = op0;
2565 to_shift = 12;
2567 else
2569 to_shift += 12;
2571 if (low2 != const0_rtx)
2573 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2574 GEN_INT (to_shift))));
2575 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2576 sub_temp = op0;
2577 to_shift = 8;
2579 else
2581 to_shift += 8;
2583 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2584 GEN_INT (to_shift))));
2585 if (low3 != const0_rtx)
2586 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2587 /* phew... */
2591 /* Analyze a 64-bit constant for certain properties. */
2592 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2593 unsigned HOST_WIDE_INT,
2594 int *, int *, int *);
2596 static void
2597 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2598 unsigned HOST_WIDE_INT low_bits,
2599 int *hbsp, int *lbsp, int *abbasp)
2601 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2602 int i;
2604 lowest_bit_set = highest_bit_set = -1;
2605 i = 0;
2608 if ((lowest_bit_set == -1)
2609 && ((low_bits >> i) & 1))
2610 lowest_bit_set = i;
2611 if ((highest_bit_set == -1)
2612 && ((high_bits >> (32 - i - 1)) & 1))
2613 highest_bit_set = (64 - i - 1);
2615 while (++i < 32
2616 && ((highest_bit_set == -1)
2617 || (lowest_bit_set == -1)));
2618 if (i == 32)
2620 i = 0;
2623 if ((lowest_bit_set == -1)
2624 && ((high_bits >> i) & 1))
2625 lowest_bit_set = i + 32;
2626 if ((highest_bit_set == -1)
2627 && ((low_bits >> (32 - i - 1)) & 1))
2628 highest_bit_set = 32 - i - 1;
2630 while (++i < 32
2631 && ((highest_bit_set == -1)
2632 || (lowest_bit_set == -1)));
2634 /* If there are no bits set this should have gone out
2635 as one instruction! */
2636 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2637 all_bits_between_are_set = 1;
2638 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2640 if (i < 32)
2642 if ((low_bits & (1 << i)) != 0)
2643 continue;
2645 else
2647 if ((high_bits & (1 << (i - 32))) != 0)
2648 continue;
2650 all_bits_between_are_set = 0;
2651 break;
2653 *hbsp = highest_bit_set;
2654 *lbsp = lowest_bit_set;
2655 *abbasp = all_bits_between_are_set;
2658 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2660 static int
2661 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2662 unsigned HOST_WIDE_INT low_bits)
2664 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2666 if (high_bits == 0
2667 || high_bits == 0xffffffff)
2668 return 1;
2670 analyze_64bit_constant (high_bits, low_bits,
2671 &highest_bit_set, &lowest_bit_set,
2672 &all_bits_between_are_set);
2674 if ((highest_bit_set == 63
2675 || lowest_bit_set == 0)
2676 && all_bits_between_are_set != 0)
2677 return 1;
2679 if ((highest_bit_set - lowest_bit_set) < 21)
2680 return 1;
2682 return 0;
2685 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2686 unsigned HOST_WIDE_INT,
2687 int, int);
2689 static unsigned HOST_WIDE_INT
2690 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2691 unsigned HOST_WIDE_INT low_bits,
2692 int lowest_bit_set, int shift)
2694 HOST_WIDE_INT hi, lo;
2696 if (lowest_bit_set < 32)
2698 lo = (low_bits >> lowest_bit_set) << shift;
2699 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2701 else
2703 lo = 0;
2704 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2706 gcc_assert (! (hi & lo));
2707 return (hi | lo);
2710 /* Here we are sure to be arch64 and this is an integer constant
2711 being loaded into a register. Emit the most efficient
2712 insn sequence possible. Detection of all the 1-insn cases
2713 has been done already. */
2714 static void
2715 sparc_emit_set_const64 (rtx op0, rtx op1)
2717 unsigned HOST_WIDE_INT high_bits, low_bits;
2718 int lowest_bit_set, highest_bit_set;
2719 int all_bits_between_are_set;
2720 rtx temp = 0;
2722 /* Sanity check that we know what we are working with. */
2723 gcc_assert (TARGET_ARCH64
2724 && (GET_CODE (op0) == SUBREG
2725 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2727 if (! can_create_pseudo_p ())
2728 temp = op0;
2730 if (GET_CODE (op1) != CONST_INT)
2732 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2733 return;
2736 if (! temp)
2737 temp = gen_reg_rtx (DImode);
2739 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2740 low_bits = (INTVAL (op1) & 0xffffffff);
2742 /* low_bits bits 0 --> 31
2743 high_bits bits 32 --> 63 */
2745 analyze_64bit_constant (high_bits, low_bits,
2746 &highest_bit_set, &lowest_bit_set,
2747 &all_bits_between_are_set);
2749 /* First try for a 2-insn sequence. */
2751 /* These situations are preferred because the optimizer can
2752 * do more things with them:
2753 * 1) mov -1, %reg
2754 * sllx %reg, shift, %reg
2755 * 2) mov -1, %reg
2756 * srlx %reg, shift, %reg
2757 * 3) mov some_small_const, %reg
2758 * sllx %reg, shift, %reg
2760 if (((highest_bit_set == 63
2761 || lowest_bit_set == 0)
2762 && all_bits_between_are_set != 0)
2763 || ((highest_bit_set - lowest_bit_set) < 12))
2765 HOST_WIDE_INT the_const = -1;
2766 int shift = lowest_bit_set;
2768 if ((highest_bit_set != 63
2769 && lowest_bit_set != 0)
2770 || all_bits_between_are_set == 0)
2772 the_const =
2773 create_simple_focus_bits (high_bits, low_bits,
2774 lowest_bit_set, 0);
2776 else if (lowest_bit_set == 0)
2777 shift = -(63 - highest_bit_set);
2779 gcc_assert (SPARC_SIMM13_P (the_const));
2780 gcc_assert (shift != 0);
2782 emit_insn (gen_safe_SET64 (temp, the_const));
2783 if (shift > 0)
2784 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
2785 GEN_INT (shift))));
2786 else if (shift < 0)
2787 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
2788 GEN_INT (-shift))));
2789 return;
2792 /* Now a range of 22 or less bits set somewhere.
2793 * 1) sethi %hi(focus_bits), %reg
2794 * sllx %reg, shift, %reg
2795 * 2) sethi %hi(focus_bits), %reg
2796 * srlx %reg, shift, %reg
2798 if ((highest_bit_set - lowest_bit_set) < 21)
2800 unsigned HOST_WIDE_INT focus_bits =
2801 create_simple_focus_bits (high_bits, low_bits,
2802 lowest_bit_set, 10);
2804 gcc_assert (SPARC_SETHI_P (focus_bits));
2805 gcc_assert (lowest_bit_set != 10);
2807 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
2809 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
2810 if (lowest_bit_set < 10)
2811 emit_insn (gen_rtx_SET (op0,
2812 gen_rtx_LSHIFTRT (DImode, temp,
2813 GEN_INT (10 - lowest_bit_set))));
2814 else if (lowest_bit_set > 10)
2815 emit_insn (gen_rtx_SET (op0,
2816 gen_rtx_ASHIFT (DImode, temp,
2817 GEN_INT (lowest_bit_set - 10))));
2818 return;
2821 /* 1) sethi %hi(low_bits), %reg
2822 * or %reg, %lo(low_bits), %reg
2823 * 2) sethi %hi(~low_bits), %reg
2824 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
2826 if (high_bits == 0
2827 || high_bits == 0xffffffff)
2829 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
2830 (high_bits == 0xffffffff));
2831 return;
2834 /* Now, try 3-insn sequences. */
2836 /* 1) sethi %hi(high_bits), %reg
2837 * or %reg, %lo(high_bits), %reg
2838 * sllx %reg, 32, %reg
2840 if (low_bits == 0)
2842 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
2843 return;
2846 /* We may be able to do something quick
2847 when the constant is negated, so try that. */
2848 if (const64_is_2insns ((~high_bits) & 0xffffffff,
2849 (~low_bits) & 0xfffffc00))
2851 /* NOTE: The trailing bits get XOR'd so we need the
2852 non-negated bits, not the negated ones. */
2853 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
2855 if ((((~high_bits) & 0xffffffff) == 0
2856 && ((~low_bits) & 0x80000000) == 0)
2857 || (((~high_bits) & 0xffffffff) == 0xffffffff
2858 && ((~low_bits) & 0x80000000) != 0))
2860 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
2862 if ((SPARC_SETHI_P (fast_int)
2863 && (~high_bits & 0xffffffff) == 0)
2864 || SPARC_SIMM13_P (fast_int))
2865 emit_insn (gen_safe_SET64 (temp, fast_int));
2866 else
2867 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
2869 else
2871 rtx negated_const;
2872 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
2873 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
2874 sparc_emit_set_const64 (temp, negated_const);
2877 /* If we are XOR'ing with -1, then we should emit a one's complement
2878 instead. This way the combiner will notice logical operations
2879 such as ANDN later on and substitute. */
2880 if (trailing_bits == 0x3ff)
2882 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2884 else
2886 emit_insn (gen_rtx_SET (op0,
2887 gen_safe_XOR64 (temp,
2888 (-0x400 | trailing_bits))));
2890 return;
2893 /* 1) sethi %hi(xxx), %reg
2894 * or %reg, %lo(xxx), %reg
2895 * sllx %reg, yyy, %reg
2897 * ??? This is just a generalized version of the low_bits==0
2898 * thing above, FIXME...
2900 if ((highest_bit_set - lowest_bit_set) < 32)
2902 unsigned HOST_WIDE_INT focus_bits =
2903 create_simple_focus_bits (high_bits, low_bits,
2904 lowest_bit_set, 0);
2906 /* We can't get here in this state. */
2907 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
2909 /* So what we know is that the set bits straddle the
2910 middle of the 64-bit word. */
2911 sparc_emit_set_const64_quick2 (op0, temp,
2912 focus_bits, 0,
2913 lowest_bit_set);
2914 return;
2917 /* 1) sethi %hi(high_bits), %reg
2918 * or %reg, %lo(high_bits), %reg
2919 * sllx %reg, 32, %reg
2920 * or %reg, low_bits, %reg
2922 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
2924 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
2925 return;
2928 /* The easiest way when all else fails, is full decomposition. */
2929 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
2932 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
2934 static bool
2935 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
2937 *p1 = SPARC_ICC_REG;
2938 *p2 = SPARC_FCC_REG;
2939 return true;
2942 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
2944 static unsigned int
2945 sparc_min_arithmetic_precision (void)
2947 return 32;
2950 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
2951 return the mode to be used for the comparison. For floating-point,
2952 CCFP[E]mode is used. CCNZmode should be used when the first operand
2953 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
2954 processing is needed. */
2956 machine_mode
2957 select_cc_mode (enum rtx_code op, rtx x, rtx y)
2959 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
2961 switch (op)
2963 case EQ:
2964 case NE:
2965 case UNORDERED:
2966 case ORDERED:
2967 case UNLT:
2968 case UNLE:
2969 case UNGT:
2970 case UNGE:
2971 case UNEQ:
2972 case LTGT:
2973 return CCFPmode;
2975 case LT:
2976 case LE:
2977 case GT:
2978 case GE:
2979 return CCFPEmode;
2981 default:
2982 gcc_unreachable ();
2985 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
2986 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
2987 && y == const0_rtx)
2989 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
2990 return CCXNZmode;
2991 else
2992 return CCNZmode;
2994 else
2996 /* This is for the cmp<mode>_sne pattern. */
2997 if (GET_CODE (x) == NOT && y == constm1_rtx)
2999 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3000 return CCXCmode;
3001 else
3002 return CCCmode;
3005 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3006 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3008 if (GET_CODE (y) == UNSPEC
3009 && (XINT (y, 1) == UNSPEC_ADDV
3010 || XINT (y, 1) == UNSPEC_SUBV
3011 || XINT (y, 1) == UNSPEC_NEGV))
3012 return CCVmode;
3013 else
3014 return CCCmode;
3017 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3018 return CCXmode;
3019 else
3020 return CCmode;
3024 /* Emit the compare insn and return the CC reg for a CODE comparison
3025 with operands X and Y. */
3027 static rtx
3028 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3030 machine_mode mode;
3031 rtx cc_reg;
3033 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3034 return x;
3036 mode = SELECT_CC_MODE (code, x, y);
3038 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3039 fcc regs (cse can't tell they're really call clobbered regs and will
3040 remove a duplicate comparison even if there is an intervening function
3041 call - it will then try to reload the cc reg via an int reg which is why
3042 we need the movcc patterns). It is possible to provide the movcc
3043 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3044 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3045 to tell cse that CCFPE mode registers (even pseudos) are call
3046 clobbered. */
3048 /* ??? This is an experiment. Rather than making changes to cse which may
3049 or may not be easy/clean, we do our own cse. This is possible because
3050 we will generate hard registers. Cse knows they're call clobbered (it
3051 doesn't know the same thing about pseudos). If we guess wrong, no big
3052 deal, but if we win, great! */
3054 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3055 #if 1 /* experiment */
3057 int reg;
3058 /* We cycle through the registers to ensure they're all exercised. */
3059 static int next_fcc_reg = 0;
3060 /* Previous x,y for each fcc reg. */
3061 static rtx prev_args[4][2];
3063 /* Scan prev_args for x,y. */
3064 for (reg = 0; reg < 4; reg++)
3065 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3066 break;
3067 if (reg == 4)
3069 reg = next_fcc_reg;
3070 prev_args[reg][0] = x;
3071 prev_args[reg][1] = y;
3072 next_fcc_reg = (next_fcc_reg + 1) & 3;
3074 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3076 #else
3077 cc_reg = gen_reg_rtx (mode);
3078 #endif /* ! experiment */
3079 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3080 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3081 else
3082 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3084 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3085 will only result in an unrecognizable insn so no point in asserting. */
3086 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3088 return cc_reg;
3092 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3095 gen_compare_reg (rtx cmp)
3097 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3100 /* This function is used for v9 only.
3101 DEST is the target of the Scc insn.
3102 CODE is the code for an Scc's comparison.
3103 X and Y are the values we compare.
3105 This function is needed to turn
3107 (set (reg:SI 110)
3108 (gt (reg:CCX 100 %icc)
3109 (const_int 0)))
3110 into
3111 (set (reg:SI 110)
3112 (gt:DI (reg:CCX 100 %icc)
3113 (const_int 0)))
3115 IE: The instruction recognizer needs to see the mode of the comparison to
3116 find the right instruction. We could use "gt:DI" right in the
3117 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3119 static int
3120 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3122 if (! TARGET_ARCH64
3123 && (GET_MODE (x) == DImode
3124 || GET_MODE (dest) == DImode))
3125 return 0;
3127 /* Try to use the movrCC insns. */
3128 if (TARGET_ARCH64
3129 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3130 && y == const0_rtx
3131 && v9_regcmp_p (compare_code))
3133 rtx op0 = x;
3134 rtx temp;
3136 /* Special case for op0 != 0. This can be done with one instruction if
3137 dest == x. */
3139 if (compare_code == NE
3140 && GET_MODE (dest) == DImode
3141 && rtx_equal_p (op0, dest))
3143 emit_insn (gen_rtx_SET (dest,
3144 gen_rtx_IF_THEN_ELSE (DImode,
3145 gen_rtx_fmt_ee (compare_code, DImode,
3146 op0, const0_rtx),
3147 const1_rtx,
3148 dest)));
3149 return 1;
3152 if (reg_overlap_mentioned_p (dest, op0))
3154 /* Handle the case where dest == x.
3155 We "early clobber" the result. */
3156 op0 = gen_reg_rtx (GET_MODE (x));
3157 emit_move_insn (op0, x);
3160 emit_insn (gen_rtx_SET (dest, const0_rtx));
3161 if (GET_MODE (op0) != DImode)
3163 temp = gen_reg_rtx (DImode);
3164 convert_move (temp, op0, 0);
3166 else
3167 temp = op0;
3168 emit_insn (gen_rtx_SET (dest,
3169 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3170 gen_rtx_fmt_ee (compare_code, DImode,
3171 temp, const0_rtx),
3172 const1_rtx,
3173 dest)));
3174 return 1;
3176 else
3178 x = gen_compare_reg_1 (compare_code, x, y);
3179 y = const0_rtx;
3181 emit_insn (gen_rtx_SET (dest, const0_rtx));
3182 emit_insn (gen_rtx_SET (dest,
3183 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3184 gen_rtx_fmt_ee (compare_code,
3185 GET_MODE (x), x, y),
3186 const1_rtx, dest)));
3187 return 1;
3192 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3193 without jumps using the addx/subx instructions. */
3195 bool
3196 emit_scc_insn (rtx operands[])
3198 rtx tem, x, y;
3199 enum rtx_code code;
3200 machine_mode mode;
3202 /* The quad-word fp compare library routines all return nonzero to indicate
3203 true, which is different from the equivalent libgcc routines, so we must
3204 handle them specially here. */
3205 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3207 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3208 GET_CODE (operands[1]));
3209 operands[2] = XEXP (operands[1], 0);
3210 operands[3] = XEXP (operands[1], 1);
3213 code = GET_CODE (operands[1]);
3214 x = operands[2];
3215 y = operands[3];
3216 mode = GET_MODE (x);
3218 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3219 more applications). The exception to this is "reg != 0" which can
3220 be done in one instruction on v9 (so we do it). */
3221 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3223 if (y != const0_rtx)
3224 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3226 rtx pat = gen_rtx_SET (operands[0],
3227 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3228 x, const0_rtx));
3230 /* If we can use addx/subx or addxc, add a clobber for CC. */
3231 if (mode == SImode || (code == NE && TARGET_VIS3))
3233 rtx clobber
3234 = gen_rtx_CLOBBER (VOIDmode,
3235 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3236 SPARC_ICC_REG));
3237 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3240 emit_insn (pat);
3241 return true;
3244 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3245 if (TARGET_ARCH64
3246 && mode == DImode
3247 && !((code == LTU || code == GTU) && TARGET_VIS3)
3248 && gen_v9_scc (operands[0], code, x, y))
3249 return true;
3251 /* We can do LTU and GEU using the addx/subx instructions too. And
3252 for GTU/LEU, if both operands are registers swap them and fall
3253 back to the easy case. */
3254 if (code == GTU || code == LEU)
3256 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3257 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3259 tem = x;
3260 x = y;
3261 y = tem;
3262 code = swap_condition (code);
3266 if (code == LTU || code == GEU)
3268 emit_insn (gen_rtx_SET (operands[0],
3269 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3270 gen_compare_reg_1 (code, x, y),
3271 const0_rtx)));
3272 return true;
3275 /* All the posibilities to use addx/subx based sequences has been
3276 exhausted, try for a 3 instruction sequence using v9 conditional
3277 moves. */
3278 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3279 return true;
3281 /* Nope, do branches. */
3282 return false;
3285 /* Emit a conditional jump insn for the v9 architecture using comparison code
3286 CODE and jump target LABEL.
3287 This function exists to take advantage of the v9 brxx insns. */
3289 static void
3290 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3292 emit_jump_insn (gen_rtx_SET (pc_rtx,
3293 gen_rtx_IF_THEN_ELSE (VOIDmode,
3294 gen_rtx_fmt_ee (code, GET_MODE (op0),
3295 op0, const0_rtx),
3296 gen_rtx_LABEL_REF (VOIDmode, label),
3297 pc_rtx)));
3300 /* Emit a conditional jump insn for the UA2011 architecture using
3301 comparison code CODE and jump target LABEL. This function exists
3302 to take advantage of the UA2011 Compare and Branch insns. */
3304 static void
3305 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3307 rtx if_then_else;
3309 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3310 gen_rtx_fmt_ee(code, GET_MODE(op0),
3311 op0, op1),
3312 gen_rtx_LABEL_REF (VOIDmode, label),
3313 pc_rtx);
3315 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3318 void
3319 emit_conditional_branch_insn (rtx operands[])
3321 /* The quad-word fp compare library routines all return nonzero to indicate
3322 true, which is different from the equivalent libgcc routines, so we must
3323 handle them specially here. */
3324 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3326 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3327 GET_CODE (operands[0]));
3328 operands[1] = XEXP (operands[0], 0);
3329 operands[2] = XEXP (operands[0], 1);
3332 /* If we can tell early on that the comparison is against a constant
3333 that won't fit in the 5-bit signed immediate field of a cbcond,
3334 use one of the other v9 conditional branch sequences. */
3335 if (TARGET_CBCOND
3336 && GET_CODE (operands[1]) == REG
3337 && (GET_MODE (operands[1]) == SImode
3338 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3339 && (GET_CODE (operands[2]) != CONST_INT
3340 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3342 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3343 return;
3346 if (TARGET_ARCH64 && operands[2] == const0_rtx
3347 && GET_CODE (operands[1]) == REG
3348 && GET_MODE (operands[1]) == DImode)
3350 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3351 return;
3354 operands[1] = gen_compare_reg (operands[0]);
3355 operands[2] = const0_rtx;
3356 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3357 operands[1], operands[2]);
3358 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3359 operands[3]));
3363 /* Generate a DFmode part of a hard TFmode register.
3364 REG is the TFmode hard register, LOW is 1 for the
3365 low 64bit of the register and 0 otherwise.
3368 gen_df_reg (rtx reg, int low)
3370 int regno = REGNO (reg);
3372 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3373 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3374 return gen_rtx_REG (DFmode, regno);
3377 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3378 Unlike normal calls, TFmode operands are passed by reference. It is
3379 assumed that no more than 3 operands are required. */
3381 static void
3382 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3384 rtx ret_slot = NULL, arg[3], func_sym;
3385 int i;
3387 /* We only expect to be called for conversions, unary, and binary ops. */
3388 gcc_assert (nargs == 2 || nargs == 3);
3390 for (i = 0; i < nargs; ++i)
3392 rtx this_arg = operands[i];
3393 rtx this_slot;
3395 /* TFmode arguments and return values are passed by reference. */
3396 if (GET_MODE (this_arg) == TFmode)
3398 int force_stack_temp;
3400 force_stack_temp = 0;
3401 if (TARGET_BUGGY_QP_LIB && i == 0)
3402 force_stack_temp = 1;
3404 if (GET_CODE (this_arg) == MEM
3405 && ! force_stack_temp)
3407 tree expr = MEM_EXPR (this_arg);
3408 if (expr)
3409 mark_addressable (expr);
3410 this_arg = XEXP (this_arg, 0);
3412 else if (CONSTANT_P (this_arg)
3413 && ! force_stack_temp)
3415 this_slot = force_const_mem (TFmode, this_arg);
3416 this_arg = XEXP (this_slot, 0);
3418 else
3420 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3422 /* Operand 0 is the return value. We'll copy it out later. */
3423 if (i > 0)
3424 emit_move_insn (this_slot, this_arg);
3425 else
3426 ret_slot = this_slot;
3428 this_arg = XEXP (this_slot, 0);
3432 arg[i] = this_arg;
3435 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3437 if (GET_MODE (operands[0]) == TFmode)
3439 if (nargs == 2)
3440 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3441 arg[0], GET_MODE (arg[0]),
3442 arg[1], GET_MODE (arg[1]));
3443 else
3444 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3445 arg[0], GET_MODE (arg[0]),
3446 arg[1], GET_MODE (arg[1]),
3447 arg[2], GET_MODE (arg[2]));
3449 if (ret_slot)
3450 emit_move_insn (operands[0], ret_slot);
3452 else
3454 rtx ret;
3456 gcc_assert (nargs == 2);
3458 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3459 GET_MODE (operands[0]),
3460 arg[1], GET_MODE (arg[1]));
3462 if (ret != operands[0])
3463 emit_move_insn (operands[0], ret);
3467 /* Expand soft-float TFmode calls to sparc abi routines. */
3469 static void
3470 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3472 const char *func;
3474 switch (code)
3476 case PLUS:
3477 func = "_Qp_add";
3478 break;
3479 case MINUS:
3480 func = "_Qp_sub";
3481 break;
3482 case MULT:
3483 func = "_Qp_mul";
3484 break;
3485 case DIV:
3486 func = "_Qp_div";
3487 break;
3488 default:
3489 gcc_unreachable ();
3492 emit_soft_tfmode_libcall (func, 3, operands);
3495 static void
3496 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3498 const char *func;
3500 gcc_assert (code == SQRT);
3501 func = "_Qp_sqrt";
3503 emit_soft_tfmode_libcall (func, 2, operands);
3506 static void
3507 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3509 const char *func;
3511 switch (code)
3513 case FLOAT_EXTEND:
3514 switch (GET_MODE (operands[1]))
3516 case E_SFmode:
3517 func = "_Qp_stoq";
3518 break;
3519 case E_DFmode:
3520 func = "_Qp_dtoq";
3521 break;
3522 default:
3523 gcc_unreachable ();
3525 break;
3527 case FLOAT_TRUNCATE:
3528 switch (GET_MODE (operands[0]))
3530 case E_SFmode:
3531 func = "_Qp_qtos";
3532 break;
3533 case E_DFmode:
3534 func = "_Qp_qtod";
3535 break;
3536 default:
3537 gcc_unreachable ();
3539 break;
3541 case FLOAT:
3542 switch (GET_MODE (operands[1]))
3544 case E_SImode:
3545 func = "_Qp_itoq";
3546 if (TARGET_ARCH64)
3547 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3548 break;
3549 case E_DImode:
3550 func = "_Qp_xtoq";
3551 break;
3552 default:
3553 gcc_unreachable ();
3555 break;
3557 case UNSIGNED_FLOAT:
3558 switch (GET_MODE (operands[1]))
3560 case E_SImode:
3561 func = "_Qp_uitoq";
3562 if (TARGET_ARCH64)
3563 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3564 break;
3565 case E_DImode:
3566 func = "_Qp_uxtoq";
3567 break;
3568 default:
3569 gcc_unreachable ();
3571 break;
3573 case FIX:
3574 switch (GET_MODE (operands[0]))
3576 case E_SImode:
3577 func = "_Qp_qtoi";
3578 break;
3579 case E_DImode:
3580 func = "_Qp_qtox";
3581 break;
3582 default:
3583 gcc_unreachable ();
3585 break;
3587 case UNSIGNED_FIX:
3588 switch (GET_MODE (operands[0]))
3590 case E_SImode:
3591 func = "_Qp_qtoui";
3592 break;
3593 case E_DImode:
3594 func = "_Qp_qtoux";
3595 break;
3596 default:
3597 gcc_unreachable ();
3599 break;
3601 default:
3602 gcc_unreachable ();
3605 emit_soft_tfmode_libcall (func, 2, operands);
3608 /* Expand a hard-float tfmode operation. All arguments must be in
3609 registers. */
3611 static void
3612 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3614 rtx op, dest;
3616 if (GET_RTX_CLASS (code) == RTX_UNARY)
3618 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3619 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3621 else
3623 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3624 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3625 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3626 operands[1], operands[2]);
3629 if (register_operand (operands[0], VOIDmode))
3630 dest = operands[0];
3631 else
3632 dest = gen_reg_rtx (GET_MODE (operands[0]));
3634 emit_insn (gen_rtx_SET (dest, op));
3636 if (dest != operands[0])
3637 emit_move_insn (operands[0], dest);
3640 void
3641 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3643 if (TARGET_HARD_QUAD)
3644 emit_hard_tfmode_operation (code, operands);
3645 else
3646 emit_soft_tfmode_binop (code, operands);
3649 void
3650 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3652 if (TARGET_HARD_QUAD)
3653 emit_hard_tfmode_operation (code, operands);
3654 else
3655 emit_soft_tfmode_unop (code, operands);
3658 void
3659 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3661 if (TARGET_HARD_QUAD)
3662 emit_hard_tfmode_operation (code, operands);
3663 else
3664 emit_soft_tfmode_cvt (code, operands);
3667 /* Return nonzero if a branch/jump/call instruction will be emitting
3668 nop into its delay slot. */
3671 empty_delay_slot (rtx_insn *insn)
3673 rtx seq;
3675 /* If no previous instruction (should not happen), return true. */
3676 if (PREV_INSN (insn) == NULL)
3677 return 1;
3679 seq = NEXT_INSN (PREV_INSN (insn));
3680 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3681 return 0;
3683 return 1;
3686 /* Return nonzero if we should emit a nop after a cbcond instruction.
3687 The cbcond instruction does not have a delay slot, however there is
3688 a severe performance penalty if a control transfer appears right
3689 after a cbcond. Therefore we emit a nop when we detect this
3690 situation. */
3693 emit_cbcond_nop (rtx_insn *insn)
3695 rtx next = next_active_insn (insn);
3697 if (!next)
3698 return 1;
3700 if (NONJUMP_INSN_P (next)
3701 && GET_CODE (PATTERN (next)) == SEQUENCE)
3702 next = XVECEXP (PATTERN (next), 0, 0);
3703 else if (CALL_P (next)
3704 && GET_CODE (PATTERN (next)) == PARALLEL)
3706 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3708 if (GET_CODE (delay) == RETURN)
3710 /* It's a sibling call. Do not emit the nop if we're going
3711 to emit something other than the jump itself as the first
3712 instruction of the sibcall sequence. */
3713 if (sparc_leaf_function_p || TARGET_FLAT)
3714 return 0;
3718 if (NONJUMP_INSN_P (next))
3719 return 0;
3721 return 1;
3724 /* Return nonzero if TRIAL can go into the call delay slot. */
3727 eligible_for_call_delay (rtx_insn *trial)
3729 rtx pat;
3731 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3732 return 0;
3734 /* Binutils allows
3735 call __tls_get_addr, %tgd_call (foo)
3736 add %l7, %o0, %o0, %tgd_add (foo)
3737 while Sun as/ld does not. */
3738 if (TARGET_GNU_TLS || !TARGET_TLS)
3739 return 1;
3741 pat = PATTERN (trial);
3743 /* We must reject tgd_add{32|64}, i.e.
3744 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3745 and tldm_add{32|64}, i.e.
3746 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3747 for Sun as/ld. */
3748 if (GET_CODE (pat) == SET
3749 && GET_CODE (SET_SRC (pat)) == PLUS)
3751 rtx unspec = XEXP (SET_SRC (pat), 1);
3753 if (GET_CODE (unspec) == UNSPEC
3754 && (XINT (unspec, 1) == UNSPEC_TLSGD
3755 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3756 return 0;
3759 return 1;
3762 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3763 instruction. RETURN_P is true if the v9 variant 'return' is to be
3764 considered in the test too.
3766 TRIAL must be a SET whose destination is a REG appropriate for the
3767 'restore' instruction or, if RETURN_P is true, for the 'return'
3768 instruction. */
3770 static int
3771 eligible_for_restore_insn (rtx trial, bool return_p)
3773 rtx pat = PATTERN (trial);
3774 rtx src = SET_SRC (pat);
3775 bool src_is_freg = false;
3776 rtx src_reg;
3778 /* Since we now can do moves between float and integer registers when
3779 VIS3 is enabled, we have to catch this case. We can allow such
3780 moves when doing a 'return' however. */
3781 src_reg = src;
3782 if (GET_CODE (src_reg) == SUBREG)
3783 src_reg = SUBREG_REG (src_reg);
3784 if (GET_CODE (src_reg) == REG
3785 && SPARC_FP_REG_P (REGNO (src_reg)))
3786 src_is_freg = true;
3788 /* The 'restore src,%g0,dest' pattern for word mode and below. */
3789 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3790 && arith_operand (src, GET_MODE (src))
3791 && ! src_is_freg)
3793 if (TARGET_ARCH64)
3794 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3795 else
3796 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
3799 /* The 'restore src,%g0,dest' pattern for double-word mode. */
3800 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
3801 && arith_double_operand (src, GET_MODE (src))
3802 && ! src_is_freg)
3803 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
3805 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
3806 else if (! TARGET_FPU && register_operand (src, SFmode))
3807 return 1;
3809 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
3810 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
3811 return 1;
3813 /* If we have the 'return' instruction, anything that does not use
3814 local or output registers and can go into a delay slot wins. */
3815 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
3816 return 1;
3818 /* The 'restore src1,src2,dest' pattern for SImode. */
3819 else if (GET_CODE (src) == PLUS
3820 && register_operand (XEXP (src, 0), SImode)
3821 && arith_operand (XEXP (src, 1), SImode))
3822 return 1;
3824 /* The 'restore src1,src2,dest' pattern for DImode. */
3825 else if (GET_CODE (src) == PLUS
3826 && register_operand (XEXP (src, 0), DImode)
3827 && arith_double_operand (XEXP (src, 1), DImode))
3828 return 1;
3830 /* The 'restore src1,%lo(src2),dest' pattern. */
3831 else if (GET_CODE (src) == LO_SUM
3832 && ! TARGET_CM_MEDMID
3833 && ((register_operand (XEXP (src, 0), SImode)
3834 && immediate_operand (XEXP (src, 1), SImode))
3835 || (TARGET_ARCH64
3836 && register_operand (XEXP (src, 0), DImode)
3837 && immediate_operand (XEXP (src, 1), DImode))))
3838 return 1;
3840 /* The 'restore src,src,dest' pattern. */
3841 else if (GET_CODE (src) == ASHIFT
3842 && (register_operand (XEXP (src, 0), SImode)
3843 || register_operand (XEXP (src, 0), DImode))
3844 && XEXP (src, 1) == const1_rtx)
3845 return 1;
3847 return 0;
3850 /* Return nonzero if TRIAL can go into the function return's delay slot. */
3853 eligible_for_return_delay (rtx_insn *trial)
3855 int regno;
3856 rtx pat;
3858 /* If the function uses __builtin_eh_return, the eh_return machinery
3859 occupies the delay slot. */
3860 if (crtl->calls_eh_return)
3861 return 0;
3863 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3864 return 0;
3866 /* In the case of a leaf or flat function, anything can go into the slot. */
3867 if (sparc_leaf_function_p || TARGET_FLAT)
3868 return 1;
3870 if (!NONJUMP_INSN_P (trial))
3871 return 0;
3873 pat = PATTERN (trial);
3874 if (GET_CODE (pat) == PARALLEL)
3876 int i;
3878 if (! TARGET_V9)
3879 return 0;
3880 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
3882 rtx expr = XVECEXP (pat, 0, i);
3883 if (GET_CODE (expr) != SET)
3884 return 0;
3885 if (GET_CODE (SET_DEST (expr)) != REG)
3886 return 0;
3887 regno = REGNO (SET_DEST (expr));
3888 if (regno >= 8 && regno < 24)
3889 return 0;
3891 return !epilogue_renumber (&pat, 1);
3894 if (GET_CODE (pat) != SET)
3895 return 0;
3897 if (GET_CODE (SET_DEST (pat)) != REG)
3898 return 0;
3900 regno = REGNO (SET_DEST (pat));
3902 /* Otherwise, only operations which can be done in tandem with
3903 a `restore' or `return' insn can go into the delay slot. */
3904 if (regno >= 8 && regno < 24)
3905 return 0;
3907 /* If this instruction sets up floating point register and we have a return
3908 instruction, it can probably go in. But restore will not work
3909 with FP_REGS. */
3910 if (! SPARC_INT_REG_P (regno))
3911 return TARGET_V9 && !epilogue_renumber (&pat, 1);
3913 return eligible_for_restore_insn (trial, true);
3916 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
3919 eligible_for_sibcall_delay (rtx_insn *trial)
3921 rtx pat;
3923 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3924 return 0;
3926 if (!NONJUMP_INSN_P (trial))
3927 return 0;
3929 pat = PATTERN (trial);
3931 if (sparc_leaf_function_p || TARGET_FLAT)
3933 /* If the tail call is done using the call instruction,
3934 we have to restore %o7 in the delay slot. */
3935 if (LEAF_SIBCALL_SLOT_RESERVED_P)
3936 return 0;
3938 /* %g1 is used to build the function address */
3939 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
3940 return 0;
3942 return 1;
3945 if (GET_CODE (pat) != SET)
3946 return 0;
3948 /* Otherwise, only operations which can be done in tandem with
3949 a `restore' insn can go into the delay slot. */
3950 if (GET_CODE (SET_DEST (pat)) != REG
3951 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
3952 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
3953 return 0;
3955 /* If it mentions %o7, it can't go in, because sibcall will clobber it
3956 in most cases. */
3957 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
3958 return 0;
3960 return eligible_for_restore_insn (trial, false);
3963 /* Determine if it's legal to put X into the constant pool. This
3964 is not possible if X contains the address of a symbol that is
3965 not constant (TLS) or not known at final link time (PIC). */
3967 static bool
3968 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
3970 switch (GET_CODE (x))
3972 case CONST_INT:
3973 case CONST_WIDE_INT:
3974 case CONST_DOUBLE:
3975 case CONST_VECTOR:
3976 /* Accept all non-symbolic constants. */
3977 return false;
3979 case LABEL_REF:
3980 /* Labels are OK iff we are non-PIC. */
3981 return flag_pic != 0;
3983 case SYMBOL_REF:
3984 /* 'Naked' TLS symbol references are never OK,
3985 non-TLS symbols are OK iff we are non-PIC. */
3986 if (SYMBOL_REF_TLS_MODEL (x))
3987 return true;
3988 else
3989 return flag_pic != 0;
3991 case CONST:
3992 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
3993 case PLUS:
3994 case MINUS:
3995 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
3996 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
3997 case UNSPEC:
3998 return true;
3999 default:
4000 gcc_unreachable ();
4004 /* Global Offset Table support. */
4005 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4006 static GTY(()) rtx global_offset_table_rtx = NULL_RTX;
4008 /* Return the SYMBOL_REF for the Global Offset Table. */
4010 static GTY(()) rtx sparc_got_symbol = NULL_RTX;
4012 static rtx
4013 sparc_got (void)
4015 if (!sparc_got_symbol)
4016 sparc_got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4018 return sparc_got_symbol;
4021 /* Ensure that we are not using patterns that are not OK with PIC. */
4024 check_pic (int i)
4026 rtx op;
4028 switch (flag_pic)
4030 case 1:
4031 op = recog_data.operand[i];
4032 gcc_assert (GET_CODE (op) != SYMBOL_REF
4033 && (GET_CODE (op) != CONST
4034 || (GET_CODE (XEXP (op, 0)) == MINUS
4035 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4036 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4037 /* fallthrough */
4038 case 2:
4039 default:
4040 return 1;
4044 /* Return true if X is an address which needs a temporary register when
4045 reloaded while generating PIC code. */
4048 pic_address_needs_scratch (rtx x)
4050 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4051 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
4052 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4053 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4054 && ! SMALL_INT (XEXP (XEXP (x, 0), 1)))
4055 return 1;
4057 return 0;
4060 /* Determine if a given RTX is a valid constant. We already know this
4061 satisfies CONSTANT_P. */
4063 static bool
4064 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4066 switch (GET_CODE (x))
4068 case CONST:
4069 case SYMBOL_REF:
4070 if (sparc_tls_referenced_p (x))
4071 return false;
4072 break;
4074 case CONST_DOUBLE:
4075 /* Floating point constants are generally not ok.
4076 The only exception is 0.0 and all-ones in VIS. */
4077 if (TARGET_VIS
4078 && SCALAR_FLOAT_MODE_P (mode)
4079 && (const_zero_operand (x, mode)
4080 || const_all_ones_operand (x, mode)))
4081 return true;
4083 return false;
4085 case CONST_VECTOR:
4086 /* Vector constants are generally not ok.
4087 The only exception is 0 or -1 in VIS. */
4088 if (TARGET_VIS
4089 && (const_zero_operand (x, mode)
4090 || const_all_ones_operand (x, mode)))
4091 return true;
4093 return false;
4095 default:
4096 break;
4099 return true;
4102 /* Determine if a given RTX is a valid constant address. */
4104 bool
4105 constant_address_p (rtx x)
4107 switch (GET_CODE (x))
4109 case LABEL_REF:
4110 case CONST_INT:
4111 case HIGH:
4112 return true;
4114 case CONST:
4115 if (flag_pic && pic_address_needs_scratch (x))
4116 return false;
4117 return sparc_legitimate_constant_p (Pmode, x);
4119 case SYMBOL_REF:
4120 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4122 default:
4123 return false;
4127 /* Nonzero if the constant value X is a legitimate general operand
4128 when generating PIC code. It is given that flag_pic is on and
4129 that X satisfies CONSTANT_P. */
4131 bool
4132 legitimate_pic_operand_p (rtx x)
4134 if (pic_address_needs_scratch (x))
4135 return false;
4136 if (sparc_tls_referenced_p (x))
4137 return false;
4138 return true;
4141 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4142 (CONST_INT_P (X) \
4143 && INTVAL (X) >= -0x1000 \
4144 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4146 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4147 (CONST_INT_P (X) \
4148 && INTVAL (X) >= -0x1000 \
4149 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4151 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4153 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4154 ordinarily. This changes a bit when generating PIC. */
4156 static bool
4157 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4159 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4161 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4162 rs1 = addr;
4163 else if (GET_CODE (addr) == PLUS)
4165 rs1 = XEXP (addr, 0);
4166 rs2 = XEXP (addr, 1);
4168 /* Canonicalize. REG comes first, if there are no regs,
4169 LO_SUM comes first. */
4170 if (!REG_P (rs1)
4171 && GET_CODE (rs1) != SUBREG
4172 && (REG_P (rs2)
4173 || GET_CODE (rs2) == SUBREG
4174 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4176 rs1 = XEXP (addr, 1);
4177 rs2 = XEXP (addr, 0);
4180 if ((flag_pic == 1
4181 && rs1 == pic_offset_table_rtx
4182 && !REG_P (rs2)
4183 && GET_CODE (rs2) != SUBREG
4184 && GET_CODE (rs2) != LO_SUM
4185 && GET_CODE (rs2) != MEM
4186 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4187 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4188 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4189 || ((REG_P (rs1)
4190 || GET_CODE (rs1) == SUBREG)
4191 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4193 imm1 = rs2;
4194 rs2 = NULL;
4196 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4197 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4199 /* We prohibit REG + REG for TFmode when there are no quad move insns
4200 and we consequently need to split. We do this because REG+REG
4201 is not an offsettable address. If we get the situation in reload
4202 where source and destination of a movtf pattern are both MEMs with
4203 REG+REG address, then only one of them gets converted to an
4204 offsettable address. */
4205 if (mode == TFmode
4206 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4207 return 0;
4209 /* Likewise for TImode, but in all cases. */
4210 if (mode == TImode)
4211 return 0;
4213 /* We prohibit REG + REG on ARCH32 if not optimizing for
4214 DFmode/DImode because then mem_min_alignment is likely to be zero
4215 after reload and the forced split would lack a matching splitter
4216 pattern. */
4217 if (TARGET_ARCH32 && !optimize
4218 && (mode == DFmode || mode == DImode))
4219 return 0;
4221 else if (USE_AS_OFFSETABLE_LO10
4222 && GET_CODE (rs1) == LO_SUM
4223 && TARGET_ARCH64
4224 && ! TARGET_CM_MEDMID
4225 && RTX_OK_FOR_OLO10_P (rs2, mode))
4227 rs2 = NULL;
4228 imm1 = XEXP (rs1, 1);
4229 rs1 = XEXP (rs1, 0);
4230 if (!CONSTANT_P (imm1)
4231 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4232 return 0;
4235 else if (GET_CODE (addr) == LO_SUM)
4237 rs1 = XEXP (addr, 0);
4238 imm1 = XEXP (addr, 1);
4240 if (!CONSTANT_P (imm1)
4241 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4242 return 0;
4244 /* We can't allow TFmode in 32-bit mode, because an offset greater
4245 than the alignment (8) may cause the LO_SUM to overflow. */
4246 if (mode == TFmode && TARGET_ARCH32)
4247 return 0;
4249 /* During reload, accept the HIGH+LO_SUM construct generated by
4250 sparc_legitimize_reload_address. */
4251 if (reload_in_progress
4252 && GET_CODE (rs1) == HIGH
4253 && XEXP (rs1, 0) == imm1)
4254 return 1;
4256 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4257 return 1;
4258 else
4259 return 0;
4261 if (GET_CODE (rs1) == SUBREG)
4262 rs1 = SUBREG_REG (rs1);
4263 if (!REG_P (rs1))
4264 return 0;
4266 if (rs2)
4268 if (GET_CODE (rs2) == SUBREG)
4269 rs2 = SUBREG_REG (rs2);
4270 if (!REG_P (rs2))
4271 return 0;
4274 if (strict)
4276 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4277 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4278 return 0;
4280 else
4282 if ((! SPARC_INT_REG_P (REGNO (rs1))
4283 && REGNO (rs1) != FRAME_POINTER_REGNUM
4284 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4285 || (rs2
4286 && (! SPARC_INT_REG_P (REGNO (rs2))
4287 && REGNO (rs2) != FRAME_POINTER_REGNUM
4288 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4289 return 0;
4291 return 1;
4294 /* Return the SYMBOL_REF for the tls_get_addr function. */
4296 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4298 static rtx
4299 sparc_tls_get_addr (void)
4301 if (!sparc_tls_symbol)
4302 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4304 return sparc_tls_symbol;
4307 /* Return the Global Offset Table to be used in TLS mode. */
4309 static rtx
4310 sparc_tls_got (void)
4312 /* In PIC mode, this is just the PIC offset table. */
4313 if (flag_pic)
4315 crtl->uses_pic_offset_table = 1;
4316 return pic_offset_table_rtx;
4319 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4320 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4321 if (TARGET_SUN_TLS && TARGET_ARCH32)
4323 load_got_register ();
4324 return global_offset_table_rtx;
4327 /* In all other cases, we load a new pseudo with the GOT symbol. */
4328 return copy_to_reg (sparc_got ());
4331 /* Return true if X contains a thread-local symbol. */
4333 static bool
4334 sparc_tls_referenced_p (rtx x)
4336 if (!TARGET_HAVE_TLS)
4337 return false;
4339 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4340 x = XEXP (XEXP (x, 0), 0);
4342 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4343 return true;
4345 /* That's all we handle in sparc_legitimize_tls_address for now. */
4346 return false;
4349 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4350 this (thread-local) address. */
4352 static rtx
4353 sparc_legitimize_tls_address (rtx addr)
4355 rtx temp1, temp2, temp3, ret, o0, got;
4356 rtx_insn *insn;
4358 gcc_assert (can_create_pseudo_p ());
4360 if (GET_CODE (addr) == SYMBOL_REF)
4361 switch (SYMBOL_REF_TLS_MODEL (addr))
4363 case TLS_MODEL_GLOBAL_DYNAMIC:
4364 start_sequence ();
4365 temp1 = gen_reg_rtx (SImode);
4366 temp2 = gen_reg_rtx (SImode);
4367 ret = gen_reg_rtx (Pmode);
4368 o0 = gen_rtx_REG (Pmode, 8);
4369 got = sparc_tls_got ();
4370 emit_insn (gen_tgd_hi22 (temp1, addr));
4371 emit_insn (gen_tgd_lo10 (temp2, temp1, addr));
4372 if (TARGET_ARCH32)
4374 emit_insn (gen_tgd_add32 (o0, got, temp2, addr));
4375 insn = emit_call_insn (gen_tgd_call32 (o0, sparc_tls_get_addr (),
4376 addr, const1_rtx));
4378 else
4380 emit_insn (gen_tgd_add64 (o0, got, temp2, addr));
4381 insn = emit_call_insn (gen_tgd_call64 (o0, sparc_tls_get_addr (),
4382 addr, const1_rtx));
4384 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4385 insn = get_insns ();
4386 end_sequence ();
4387 emit_libcall_block (insn, ret, o0, addr);
4388 break;
4390 case TLS_MODEL_LOCAL_DYNAMIC:
4391 start_sequence ();
4392 temp1 = gen_reg_rtx (SImode);
4393 temp2 = gen_reg_rtx (SImode);
4394 temp3 = gen_reg_rtx (Pmode);
4395 ret = gen_reg_rtx (Pmode);
4396 o0 = gen_rtx_REG (Pmode, 8);
4397 got = sparc_tls_got ();
4398 emit_insn (gen_tldm_hi22 (temp1));
4399 emit_insn (gen_tldm_lo10 (temp2, temp1));
4400 if (TARGET_ARCH32)
4402 emit_insn (gen_tldm_add32 (o0, got, temp2));
4403 insn = emit_call_insn (gen_tldm_call32 (o0, sparc_tls_get_addr (),
4404 const1_rtx));
4406 else
4408 emit_insn (gen_tldm_add64 (o0, got, temp2));
4409 insn = emit_call_insn (gen_tldm_call64 (o0, sparc_tls_get_addr (),
4410 const1_rtx));
4412 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4413 insn = get_insns ();
4414 end_sequence ();
4415 emit_libcall_block (insn, temp3, o0,
4416 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4417 UNSPEC_TLSLD_BASE));
4418 temp1 = gen_reg_rtx (SImode);
4419 temp2 = gen_reg_rtx (SImode);
4420 emit_insn (gen_tldo_hix22 (temp1, addr));
4421 emit_insn (gen_tldo_lox10 (temp2, temp1, addr));
4422 if (TARGET_ARCH32)
4423 emit_insn (gen_tldo_add32 (ret, temp3, temp2, addr));
4424 else
4425 emit_insn (gen_tldo_add64 (ret, temp3, temp2, addr));
4426 break;
4428 case TLS_MODEL_INITIAL_EXEC:
4429 temp1 = gen_reg_rtx (SImode);
4430 temp2 = gen_reg_rtx (SImode);
4431 temp3 = gen_reg_rtx (Pmode);
4432 got = sparc_tls_got ();
4433 emit_insn (gen_tie_hi22 (temp1, addr));
4434 emit_insn (gen_tie_lo10 (temp2, temp1, addr));
4435 if (TARGET_ARCH32)
4436 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4437 else
4438 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4439 if (TARGET_SUN_TLS)
4441 ret = gen_reg_rtx (Pmode);
4442 if (TARGET_ARCH32)
4443 emit_insn (gen_tie_add32 (ret, gen_rtx_REG (Pmode, 7),
4444 temp3, addr));
4445 else
4446 emit_insn (gen_tie_add64 (ret, gen_rtx_REG (Pmode, 7),
4447 temp3, addr));
4449 else
4450 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4451 break;
4453 case TLS_MODEL_LOCAL_EXEC:
4454 temp1 = gen_reg_rtx (Pmode);
4455 temp2 = gen_reg_rtx (Pmode);
4456 if (TARGET_ARCH32)
4458 emit_insn (gen_tle_hix22_sp32 (temp1, addr));
4459 emit_insn (gen_tle_lox10_sp32 (temp2, temp1, addr));
4461 else
4463 emit_insn (gen_tle_hix22_sp64 (temp1, addr));
4464 emit_insn (gen_tle_lox10_sp64 (temp2, temp1, addr));
4466 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4467 break;
4469 default:
4470 gcc_unreachable ();
4473 else if (GET_CODE (addr) == CONST)
4475 rtx base, offset;
4477 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4479 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4480 offset = XEXP (XEXP (addr, 0), 1);
4482 base = force_operand (base, NULL_RTX);
4483 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4484 offset = force_reg (Pmode, offset);
4485 ret = gen_rtx_PLUS (Pmode, base, offset);
4488 else
4489 gcc_unreachable (); /* for now ... */
4491 return ret;
4494 /* Legitimize PIC addresses. If the address is already position-independent,
4495 we return ORIG. Newly generated position-independent addresses go into a
4496 reg. This is REG if nonzero, otherwise we allocate register(s) as
4497 necessary. */
4499 static rtx
4500 sparc_legitimize_pic_address (rtx orig, rtx reg)
4502 bool gotdata_op = false;
4504 if (GET_CODE (orig) == SYMBOL_REF
4505 /* See the comment in sparc_expand_move. */
4506 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4508 rtx pic_ref, address;
4509 rtx_insn *insn;
4511 if (reg == 0)
4513 gcc_assert (can_create_pseudo_p ());
4514 reg = gen_reg_rtx (Pmode);
4517 if (flag_pic == 2)
4519 /* If not during reload, allocate another temp reg here for loading
4520 in the address, so that these instructions can be optimized
4521 properly. */
4522 rtx temp_reg = (! can_create_pseudo_p ()
4523 ? reg : gen_reg_rtx (Pmode));
4525 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4526 won't get confused into thinking that these two instructions
4527 are loading in the true address of the symbol. If in the
4528 future a PIC rtx exists, that should be used instead. */
4529 if (TARGET_ARCH64)
4531 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4532 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4534 else
4536 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4537 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4539 address = temp_reg;
4540 gotdata_op = true;
4542 else
4543 address = orig;
4545 crtl->uses_pic_offset_table = 1;
4546 if (gotdata_op)
4548 if (TARGET_ARCH64)
4549 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4550 pic_offset_table_rtx,
4551 address, orig));
4552 else
4553 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4554 pic_offset_table_rtx,
4555 address, orig));
4557 else
4559 pic_ref
4560 = gen_const_mem (Pmode,
4561 gen_rtx_PLUS (Pmode,
4562 pic_offset_table_rtx, address));
4563 insn = emit_move_insn (reg, pic_ref);
4566 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4567 by loop. */
4568 set_unique_reg_note (insn, REG_EQUAL, orig);
4569 return reg;
4571 else if (GET_CODE (orig) == CONST)
4573 rtx base, offset;
4575 if (GET_CODE (XEXP (orig, 0)) == PLUS
4576 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
4577 return orig;
4579 if (reg == 0)
4581 gcc_assert (can_create_pseudo_p ());
4582 reg = gen_reg_rtx (Pmode);
4585 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4586 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4587 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4588 base == reg ? NULL_RTX : reg);
4590 if (GET_CODE (offset) == CONST_INT)
4592 if (SMALL_INT (offset))
4593 return plus_constant (Pmode, base, INTVAL (offset));
4594 else if (can_create_pseudo_p ())
4595 offset = force_reg (Pmode, offset);
4596 else
4597 /* If we reach here, then something is seriously wrong. */
4598 gcc_unreachable ();
4600 return gen_rtx_PLUS (Pmode, base, offset);
4602 else if (GET_CODE (orig) == LABEL_REF)
4603 /* ??? We ought to be checking that the register is live instead, in case
4604 it is eliminated. */
4605 crtl->uses_pic_offset_table = 1;
4607 return orig;
4610 /* Try machine-dependent ways of modifying an illegitimate address X
4611 to be legitimate. If we find one, return the new, valid address.
4613 OLDX is the address as it was before break_out_memory_refs was called.
4614 In some cases it is useful to look at this to decide what needs to be done.
4616 MODE is the mode of the operand pointed to by X.
4618 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4620 static rtx
4621 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4622 machine_mode mode)
4624 rtx orig_x = x;
4626 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4627 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4628 force_operand (XEXP (x, 0), NULL_RTX));
4629 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4630 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4631 force_operand (XEXP (x, 1), NULL_RTX));
4632 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4633 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4634 XEXP (x, 1));
4635 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4636 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4637 force_operand (XEXP (x, 1), NULL_RTX));
4639 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4640 return x;
4642 if (sparc_tls_referenced_p (x))
4643 x = sparc_legitimize_tls_address (x);
4644 else if (flag_pic)
4645 x = sparc_legitimize_pic_address (x, NULL_RTX);
4646 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4647 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4648 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4649 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4650 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4651 copy_to_mode_reg (Pmode, XEXP (x, 0)));
4652 else if (GET_CODE (x) == SYMBOL_REF
4653 || GET_CODE (x) == CONST
4654 || GET_CODE (x) == LABEL_REF)
4655 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
4657 return x;
4660 /* Delegitimize an address that was legitimized by the above function. */
4662 static rtx
4663 sparc_delegitimize_address (rtx x)
4665 x = delegitimize_mem_from_attrs (x);
4667 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 1)) == UNSPEC)
4668 switch (XINT (XEXP (x, 1), 1))
4670 case UNSPEC_MOVE_PIC:
4671 case UNSPEC_TLSLE:
4672 x = XVECEXP (XEXP (x, 1), 0, 0);
4673 gcc_assert (GET_CODE (x) == SYMBOL_REF);
4674 break;
4675 default:
4676 break;
4679 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
4680 if (GET_CODE (x) == MINUS
4681 && REG_P (XEXP (x, 0))
4682 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
4683 && GET_CODE (XEXP (x, 1)) == LO_SUM
4684 && GET_CODE (XEXP (XEXP (x, 1), 1)) == UNSPEC
4685 && XINT (XEXP (XEXP (x, 1), 1), 1) == UNSPEC_MOVE_PIC_LABEL)
4687 x = XVECEXP (XEXP (XEXP (x, 1), 1), 0, 0);
4688 gcc_assert (GET_CODE (x) == LABEL_REF);
4691 return x;
4694 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
4695 replace the input X, or the original X if no replacement is called for.
4696 The output parameter *WIN is 1 if the calling macro should goto WIN,
4697 0 if it should not.
4699 For SPARC, we wish to handle addresses by splitting them into
4700 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
4701 This cuts the number of extra insns by one.
4703 Do nothing when generating PIC code and the address is a symbolic
4704 operand or requires a scratch register. */
4707 sparc_legitimize_reload_address (rtx x, machine_mode mode,
4708 int opnum, int type,
4709 int ind_levels ATTRIBUTE_UNUSED, int *win)
4711 /* Decompose SImode constants into HIGH+LO_SUM. */
4712 if (CONSTANT_P (x)
4713 && (mode != TFmode || TARGET_ARCH64)
4714 && GET_MODE (x) == SImode
4715 && GET_CODE (x) != LO_SUM
4716 && GET_CODE (x) != HIGH
4717 && sparc_cmodel <= CM_MEDLOW
4718 && !(flag_pic
4719 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
4721 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
4722 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4723 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4724 opnum, (enum reload_type)type);
4725 *win = 1;
4726 return x;
4729 /* We have to recognize what we have already generated above. */
4730 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
4732 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
4733 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
4734 opnum, (enum reload_type)type);
4735 *win = 1;
4736 return x;
4739 *win = 0;
4740 return x;
4743 /* Return true if ADDR (a legitimate address expression)
4744 has an effect that depends on the machine mode it is used for.
4746 In PIC mode,
4748 (mem:HI [%l7+a])
4750 is not equivalent to
4752 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
4754 because [%l7+a+1] is interpreted as the address of (a+1). */
4757 static bool
4758 sparc_mode_dependent_address_p (const_rtx addr,
4759 addr_space_t as ATTRIBUTE_UNUSED)
4761 if (flag_pic && GET_CODE (addr) == PLUS)
4763 rtx op0 = XEXP (addr, 0);
4764 rtx op1 = XEXP (addr, 1);
4765 if (op0 == pic_offset_table_rtx
4766 && symbolic_operand (op1, VOIDmode))
4767 return true;
4770 return false;
4773 #ifdef HAVE_GAS_HIDDEN
4774 # define USE_HIDDEN_LINKONCE 1
4775 #else
4776 # define USE_HIDDEN_LINKONCE 0
4777 #endif
4779 static void
4780 get_pc_thunk_name (char name[32], unsigned int regno)
4782 const char *reg_name = reg_names[regno];
4784 /* Skip the leading '%' as that cannot be used in a
4785 symbol name. */
4786 reg_name += 1;
4788 if (USE_HIDDEN_LINKONCE)
4789 sprintf (name, "__sparc_get_pc_thunk.%s", reg_name);
4790 else
4791 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC", regno);
4794 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4796 static rtx
4797 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2, rtx op3)
4799 int orig_flag_pic = flag_pic;
4800 rtx insn;
4802 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4803 flag_pic = 0;
4804 if (TARGET_ARCH64)
4805 insn = gen_load_pcrel_symdi (op0, op1, op2, op3);
4806 else
4807 insn = gen_load_pcrel_symsi (op0, op1, op2, op3);
4808 flag_pic = orig_flag_pic;
4810 return insn;
4813 /* Emit code to load the GOT register. */
4815 void
4816 load_got_register (void)
4818 /* In PIC mode, this will retrieve pic_offset_table_rtx. */
4819 if (!global_offset_table_rtx)
4820 global_offset_table_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4822 if (TARGET_VXWORKS_RTP)
4823 emit_insn (gen_vxworks_load_got ());
4824 else
4826 /* The GOT symbol is subject to a PC-relative relocation so we need a
4827 helper function to add the PC value and thus get the final value. */
4828 if (!got_helper_rtx)
4830 char name[32];
4831 get_pc_thunk_name (name, GLOBAL_OFFSET_TABLE_REGNUM);
4832 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4835 emit_insn (gen_load_pcrel_sym (global_offset_table_rtx, sparc_got (),
4836 got_helper_rtx,
4837 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM)));
4840 /* Need to emit this whether or not we obey regdecls,
4841 since setjmp/longjmp can cause life info to screw up.
4842 ??? In the case where we don't obey regdecls, this is not sufficient
4843 since we may not fall out the bottom. */
4844 emit_use (global_offset_table_rtx);
4847 /* Emit a call instruction with the pattern given by PAT. ADDR is the
4848 address of the call target. */
4850 void
4851 sparc_emit_call_insn (rtx pat, rtx addr)
4853 rtx_insn *insn;
4855 insn = emit_call_insn (pat);
4857 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
4858 if (TARGET_VXWORKS_RTP
4859 && flag_pic
4860 && GET_CODE (addr) == SYMBOL_REF
4861 && (SYMBOL_REF_DECL (addr)
4862 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
4863 : !SYMBOL_REF_LOCAL_P (addr)))
4865 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
4866 crtl->uses_pic_offset_table = 1;
4870 /* Return 1 if RTX is a MEM which is known to be aligned to at
4871 least a DESIRED byte boundary. */
4874 mem_min_alignment (rtx mem, int desired)
4876 rtx addr, base, offset;
4878 /* If it's not a MEM we can't accept it. */
4879 if (GET_CODE (mem) != MEM)
4880 return 0;
4882 /* Obviously... */
4883 if (!TARGET_UNALIGNED_DOUBLES
4884 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
4885 return 1;
4887 /* ??? The rest of the function predates MEM_ALIGN so
4888 there is probably a bit of redundancy. */
4889 addr = XEXP (mem, 0);
4890 base = offset = NULL_RTX;
4891 if (GET_CODE (addr) == PLUS)
4893 if (GET_CODE (XEXP (addr, 0)) == REG)
4895 base = XEXP (addr, 0);
4897 /* What we are saying here is that if the base
4898 REG is aligned properly, the compiler will make
4899 sure any REG based index upon it will be so
4900 as well. */
4901 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
4902 offset = XEXP (addr, 1);
4903 else
4904 offset = const0_rtx;
4907 else if (GET_CODE (addr) == REG)
4909 base = addr;
4910 offset = const0_rtx;
4913 if (base != NULL_RTX)
4915 int regno = REGNO (base);
4917 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
4919 /* Check if the compiler has recorded some information
4920 about the alignment of the base REG. If reload has
4921 completed, we already matched with proper alignments.
4922 If not running global_alloc, reload might give us
4923 unaligned pointer to local stack though. */
4924 if (((cfun != 0
4925 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
4926 || (optimize && reload_completed))
4927 && (INTVAL (offset) & (desired - 1)) == 0)
4928 return 1;
4930 else
4932 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
4933 return 1;
4936 else if (! TARGET_UNALIGNED_DOUBLES
4937 || CONSTANT_P (addr)
4938 || GET_CODE (addr) == LO_SUM)
4940 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
4941 is true, in which case we can only assume that an access is aligned if
4942 it is to a constant address, or the address involves a LO_SUM. */
4943 return 1;
4946 /* An obviously unaligned address. */
4947 return 0;
4951 /* Vectors to keep interesting information about registers where it can easily
4952 be got. We used to use the actual mode value as the bit number, but there
4953 are more than 32 modes now. Instead we use two tables: one indexed by
4954 hard register number, and one indexed by mode. */
4956 /* The purpose of sparc_mode_class is to shrink the range of modes so that
4957 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
4958 mapped into one sparc_mode_class mode. */
4960 enum sparc_mode_class {
4961 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
4962 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
4963 CC_MODE, CCFP_MODE
4966 /* Modes for single-word and smaller quantities. */
4967 #define S_MODES \
4968 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
4970 /* Modes for double-word and smaller quantities. */
4971 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4973 /* Modes for quad-word and smaller quantities. */
4974 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
4976 /* Modes for 8-word and smaller quantities. */
4977 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
4979 /* Modes for single-float quantities. */
4980 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
4982 /* Modes for double-float and smaller quantities. */
4983 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
4985 /* Modes for quad-float and smaller quantities. */
4986 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
4988 /* Modes for quad-float pairs and smaller quantities. */
4989 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
4991 /* Modes for double-float only quantities. */
4992 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
4994 /* Modes for quad-float and double-float only quantities. */
4995 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
4997 /* Modes for quad-float pairs and double-float only quantities. */
4998 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5000 /* Modes for condition codes. */
5001 #define CC_MODES (1 << (int) CC_MODE)
5002 #define CCFP_MODES (1 << (int) CCFP_MODE)
5004 /* Value is 1 if register/mode pair is acceptable on sparc.
5006 The funny mixture of D and T modes is because integer operations
5007 do not specially operate on tetra quantities, so non-quad-aligned
5008 registers can hold quadword quantities (except %o4 and %i4 because
5009 they cross fixed registers).
5011 ??? Note that, despite the settings, non-double-aligned parameter
5012 registers can hold double-word quantities in 32-bit mode. */
5014 /* This points to either the 32-bit or the 64-bit version. */
5015 static const int *hard_regno_mode_classes;
5017 static const int hard_32bit_mode_classes[] = {
5018 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5019 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5020 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5021 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5023 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5024 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5025 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5026 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5028 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5029 and none can hold SFmode/SImode values. */
5030 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5031 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5032 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5033 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5035 /* %fcc[0123] */
5036 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5038 /* %icc, %sfp, %gsr */
5039 CC_MODES, 0, D_MODES
5042 static const int hard_64bit_mode_classes[] = {
5043 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5044 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5045 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5046 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5048 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5049 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5050 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5051 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5053 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5054 and none can hold SFmode/SImode values. */
5055 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5056 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5057 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5058 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5060 /* %fcc[0123] */
5061 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5063 /* %icc, %sfp, %gsr */
5064 CC_MODES, 0, D_MODES
5067 static int sparc_mode_class [NUM_MACHINE_MODES];
5069 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5071 static void
5072 sparc_init_modes (void)
5074 int i;
5076 for (i = 0; i < NUM_MACHINE_MODES; i++)
5078 machine_mode m = (machine_mode) i;
5079 unsigned int size = GET_MODE_SIZE (m);
5081 switch (GET_MODE_CLASS (m))
5083 case MODE_INT:
5084 case MODE_PARTIAL_INT:
5085 case MODE_COMPLEX_INT:
5086 if (size < 4)
5087 sparc_mode_class[i] = 1 << (int) H_MODE;
5088 else if (size == 4)
5089 sparc_mode_class[i] = 1 << (int) S_MODE;
5090 else if (size == 8)
5091 sparc_mode_class[i] = 1 << (int) D_MODE;
5092 else if (size == 16)
5093 sparc_mode_class[i] = 1 << (int) T_MODE;
5094 else if (size == 32)
5095 sparc_mode_class[i] = 1 << (int) O_MODE;
5096 else
5097 sparc_mode_class[i] = 0;
5098 break;
5099 case MODE_VECTOR_INT:
5100 if (size == 4)
5101 sparc_mode_class[i] = 1 << (int) SF_MODE;
5102 else if (size == 8)
5103 sparc_mode_class[i] = 1 << (int) DF_MODE;
5104 else
5105 sparc_mode_class[i] = 0;
5106 break;
5107 case MODE_FLOAT:
5108 case MODE_COMPLEX_FLOAT:
5109 if (size == 4)
5110 sparc_mode_class[i] = 1 << (int) SF_MODE;
5111 else if (size == 8)
5112 sparc_mode_class[i] = 1 << (int) DF_MODE;
5113 else if (size == 16)
5114 sparc_mode_class[i] = 1 << (int) TF_MODE;
5115 else if (size == 32)
5116 sparc_mode_class[i] = 1 << (int) OF_MODE;
5117 else
5118 sparc_mode_class[i] = 0;
5119 break;
5120 case MODE_CC:
5121 if (m == CCFPmode || m == CCFPEmode)
5122 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5123 else
5124 sparc_mode_class[i] = 1 << (int) CC_MODE;
5125 break;
5126 default:
5127 sparc_mode_class[i] = 0;
5128 break;
5132 if (TARGET_ARCH64)
5133 hard_regno_mode_classes = hard_64bit_mode_classes;
5134 else
5135 hard_regno_mode_classes = hard_32bit_mode_classes;
5137 /* Initialize the array used by REGNO_REG_CLASS. */
5138 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5140 if (i < 16 && TARGET_V8PLUS)
5141 sparc_regno_reg_class[i] = I64_REGS;
5142 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5143 sparc_regno_reg_class[i] = GENERAL_REGS;
5144 else if (i < 64)
5145 sparc_regno_reg_class[i] = FP_REGS;
5146 else if (i < 96)
5147 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5148 else if (i < 100)
5149 sparc_regno_reg_class[i] = FPCC_REGS;
5150 else
5151 sparc_regno_reg_class[i] = NO_REGS;
5155 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5157 static inline bool
5158 save_global_or_fp_reg_p (unsigned int regno,
5159 int leaf_function ATTRIBUTE_UNUSED)
5161 return !call_used_regs[regno] && df_regs_ever_live_p (regno);
5164 /* Return whether the return address register (%i7) is needed. */
5166 static inline bool
5167 return_addr_reg_needed_p (int leaf_function)
5169 /* If it is live, for example because of __builtin_return_address (0). */
5170 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5171 return true;
5173 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5174 if (!leaf_function
5175 /* Loading the GOT register clobbers %o7. */
5176 || crtl->uses_pic_offset_table
5177 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5178 return true;
5180 return false;
5183 /* Return whether REGNO, a local or in register, must be saved/restored. */
5185 static bool
5186 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5188 /* General case: call-saved registers live at some point. */
5189 if (!call_used_regs[regno] && df_regs_ever_live_p (regno))
5190 return true;
5192 /* Frame pointer register (%fp) if needed. */
5193 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5194 return true;
5196 /* Return address register (%i7) if needed. */
5197 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5198 return true;
5200 /* GOT register (%l7) if needed. */
5201 if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
5202 return true;
5204 /* If the function accesses prior frames, the frame pointer and the return
5205 address of the previous frame must be saved on the stack. */
5206 if (crtl->accesses_prior_frames
5207 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5208 return true;
5210 return false;
5213 /* Compute the frame size required by the function. This function is called
5214 during the reload pass and also by sparc_expand_prologue. */
5216 HOST_WIDE_INT
5217 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5219 HOST_WIDE_INT frame_size, apparent_frame_size;
5220 int args_size, n_global_fp_regs = 0;
5221 bool save_local_in_regs_p = false;
5222 unsigned int i;
5224 /* If the function allocates dynamic stack space, the dynamic offset is
5225 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5226 if (leaf_function && !cfun->calls_alloca)
5227 args_size = 0;
5228 else
5229 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5231 /* Calculate space needed for global registers. */
5232 if (TARGET_ARCH64)
5234 for (i = 0; i < 8; i++)
5235 if (save_global_or_fp_reg_p (i, 0))
5236 n_global_fp_regs += 2;
5238 else
5240 for (i = 0; i < 8; i += 2)
5241 if (save_global_or_fp_reg_p (i, 0)
5242 || save_global_or_fp_reg_p (i + 1, 0))
5243 n_global_fp_regs += 2;
5246 /* In the flat window model, find out which local and in registers need to
5247 be saved. We don't reserve space in the current frame for them as they
5248 will be spilled into the register window save area of the caller's frame.
5249 However, as soon as we use this register window save area, we must create
5250 that of the current frame to make it the live one. */
5251 if (TARGET_FLAT)
5252 for (i = 16; i < 32; i++)
5253 if (save_local_or_in_reg_p (i, leaf_function))
5255 save_local_in_regs_p = true;
5256 break;
5259 /* Calculate space needed for FP registers. */
5260 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5261 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5262 n_global_fp_regs += 2;
5264 if (size == 0
5265 && n_global_fp_regs == 0
5266 && args_size == 0
5267 && !save_local_in_regs_p)
5268 frame_size = apparent_frame_size = 0;
5269 else
5271 /* We subtract STARTING_FRAME_OFFSET, remember it's negative. */
5272 apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
5273 apparent_frame_size += n_global_fp_regs * 4;
5275 /* We need to add the size of the outgoing argument area. */
5276 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5278 /* And that of the register window save area. */
5279 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5281 /* Finally, bump to the appropriate alignment. */
5282 frame_size = SPARC_STACK_ALIGN (frame_size);
5285 /* Set up values for use in prologue and epilogue. */
5286 sparc_frame_size = frame_size;
5287 sparc_apparent_frame_size = apparent_frame_size;
5288 sparc_n_global_fp_regs = n_global_fp_regs;
5289 sparc_save_local_in_regs_p = save_local_in_regs_p;
5291 return frame_size;
5294 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5297 sparc_initial_elimination_offset (int to)
5299 int offset;
5301 if (to == STACK_POINTER_REGNUM)
5302 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5303 else
5304 offset = 0;
5306 offset += SPARC_STACK_BIAS;
5307 return offset;
5310 /* Output any necessary .register pseudo-ops. */
5312 void
5313 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5315 #ifdef HAVE_AS_REGISTER_PSEUDO_OP
5316 int i;
5318 if (TARGET_ARCH32)
5319 return;
5321 /* Check if %g[2367] were used without
5322 .register being printed for them already. */
5323 for (i = 2; i < 8; i++)
5325 if (df_regs_ever_live_p (i)
5326 && ! sparc_hard_reg_printed [i])
5328 sparc_hard_reg_printed [i] = 1;
5329 /* %g7 is used as TLS base register, use #ignore
5330 for it instead of #scratch. */
5331 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5332 i == 7 ? "ignore" : "scratch");
5334 if (i == 3) i = 5;
5336 #endif
5339 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5341 #if PROBE_INTERVAL > 4096
5342 #error Cannot use indexed addressing mode for stack probing
5343 #endif
5345 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5346 inclusive. These are offsets from the current stack pointer.
5348 Note that we don't use the REG+REG addressing mode for the probes because
5349 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5350 so the advantages of having a single code win here. */
5352 static void
5353 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5355 rtx g1 = gen_rtx_REG (Pmode, 1);
5357 /* See if we have a constant small number of probes to generate. If so,
5358 that's the easy case. */
5359 if (size <= PROBE_INTERVAL)
5361 emit_move_insn (g1, GEN_INT (first));
5362 emit_insn (gen_rtx_SET (g1,
5363 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5364 emit_stack_probe (plus_constant (Pmode, g1, -size));
5367 /* The run-time loop is made up of 9 insns in the generic case while the
5368 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5369 else if (size <= 4 * PROBE_INTERVAL)
5371 HOST_WIDE_INT i;
5373 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5374 emit_insn (gen_rtx_SET (g1,
5375 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5376 emit_stack_probe (g1);
5378 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5379 it exceeds SIZE. If only two probes are needed, this will not
5380 generate any code. Then probe at FIRST + SIZE. */
5381 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5383 emit_insn (gen_rtx_SET (g1,
5384 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5385 emit_stack_probe (g1);
5388 emit_stack_probe (plus_constant (Pmode, g1,
5389 (i - PROBE_INTERVAL) - size));
5392 /* Otherwise, do the same as above, but in a loop. Note that we must be
5393 extra careful with variables wrapping around because we might be at
5394 the very top (or the very bottom) of the address space and we have
5395 to be able to handle this case properly; in particular, we use an
5396 equality test for the loop condition. */
5397 else
5399 HOST_WIDE_INT rounded_size;
5400 rtx g4 = gen_rtx_REG (Pmode, 4);
5402 emit_move_insn (g1, GEN_INT (first));
5405 /* Step 1: round SIZE to the previous multiple of the interval. */
5407 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5408 emit_move_insn (g4, GEN_INT (rounded_size));
5411 /* Step 2: compute initial and final value of the loop counter. */
5413 /* TEST_ADDR = SP + FIRST. */
5414 emit_insn (gen_rtx_SET (g1,
5415 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5417 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5418 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5421 /* Step 3: the loop
5423 while (TEST_ADDR != LAST_ADDR)
5425 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5426 probe at TEST_ADDR
5429 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5430 until it is equal to ROUNDED_SIZE. */
5432 if (TARGET_ARCH64)
5433 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5434 else
5435 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5438 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5439 that SIZE is equal to ROUNDED_SIZE. */
5441 if (size != rounded_size)
5442 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5445 /* Make sure nothing is scheduled before we are done. */
5446 emit_insn (gen_blockage ());
5449 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5450 absolute addresses. */
5452 const char *
5453 output_probe_stack_range (rtx reg1, rtx reg2)
5455 static int labelno = 0;
5456 char loop_lab[32];
5457 rtx xops[2];
5459 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5461 /* Loop. */
5462 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5464 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5465 xops[0] = reg1;
5466 xops[1] = GEN_INT (-PROBE_INTERVAL);
5467 output_asm_insn ("add\t%0, %1, %0", xops);
5469 /* Test if TEST_ADDR == LAST_ADDR. */
5470 xops[1] = reg2;
5471 output_asm_insn ("cmp\t%0, %1", xops);
5473 /* Probe at TEST_ADDR and branch. */
5474 if (TARGET_ARCH64)
5475 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5476 else
5477 fputs ("\tbne\t", asm_out_file);
5478 assemble_name_raw (asm_out_file, loop_lab);
5479 fputc ('\n', asm_out_file);
5480 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5481 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5483 return "";
5486 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5487 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5488 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5489 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5490 the action to be performed if it returns false. Return the new offset. */
5492 typedef bool (*sorr_pred_t) (unsigned int, int);
5493 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5495 static int
5496 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5497 int offset, int leaf_function, sorr_pred_t save_p,
5498 sorr_act_t action_true, sorr_act_t action_false)
5500 unsigned int i;
5501 rtx mem;
5502 rtx_insn *insn;
5504 if (TARGET_ARCH64 && high <= 32)
5506 int fp_offset = -1;
5508 for (i = low; i < high; i++)
5510 if (save_p (i, leaf_function))
5512 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5513 base, offset));
5514 if (action_true == SORR_SAVE)
5516 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5517 RTX_FRAME_RELATED_P (insn) = 1;
5519 else /* action_true == SORR_RESTORE */
5521 /* The frame pointer must be restored last since its old
5522 value may be used as base address for the frame. This
5523 is problematic in 64-bit mode only because of the lack
5524 of double-word load instruction. */
5525 if (i == HARD_FRAME_POINTER_REGNUM)
5526 fp_offset = offset;
5527 else
5528 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5530 offset += 8;
5532 else if (action_false == SORR_ADVANCE)
5533 offset += 8;
5536 if (fp_offset >= 0)
5538 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5539 emit_move_insn (hard_frame_pointer_rtx, mem);
5542 else
5544 for (i = low; i < high; i += 2)
5546 bool reg0 = save_p (i, leaf_function);
5547 bool reg1 = save_p (i + 1, leaf_function);
5548 machine_mode mode;
5549 int regno;
5551 if (reg0 && reg1)
5553 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5554 regno = i;
5556 else if (reg0)
5558 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5559 regno = i;
5561 else if (reg1)
5563 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5564 regno = i + 1;
5565 offset += 4;
5567 else
5569 if (action_false == SORR_ADVANCE)
5570 offset += 8;
5571 continue;
5574 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5575 if (action_true == SORR_SAVE)
5577 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5578 RTX_FRAME_RELATED_P (insn) = 1;
5579 if (mode == DImode)
5581 rtx set1, set2;
5582 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5583 offset));
5584 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5585 RTX_FRAME_RELATED_P (set1) = 1;
5587 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5588 offset + 4));
5589 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5590 RTX_FRAME_RELATED_P (set2) = 1;
5591 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5592 gen_rtx_PARALLEL (VOIDmode,
5593 gen_rtvec (2, set1, set2)));
5596 else /* action_true == SORR_RESTORE */
5597 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5599 /* Bump and round down to double word
5600 in case we already bumped by 4. */
5601 offset = ROUND_DOWN (offset + 8, 8);
5605 return offset;
5608 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5610 static rtx
5611 emit_adjust_base_to_offset (rtx base, int offset)
5613 /* ??? This might be optimized a little as %g1 might already have a
5614 value close enough that a single add insn will do. */
5615 /* ??? Although, all of this is probably only a temporary fix because
5616 if %g1 can hold a function result, then sparc_expand_epilogue will
5617 lose (the result will be clobbered). */
5618 rtx new_base = gen_rtx_REG (Pmode, 1);
5619 emit_move_insn (new_base, GEN_INT (offset));
5620 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5621 return new_base;
5624 /* Emit code to save/restore call-saved global and FP registers. */
5626 static void
5627 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5629 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5631 base = emit_adjust_base_to_offset (base, offset);
5632 offset = 0;
5635 offset
5636 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5637 save_global_or_fp_reg_p, action, SORR_NONE);
5638 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5639 save_global_or_fp_reg_p, action, SORR_NONE);
5642 /* Emit code to save/restore call-saved local and in registers. */
5644 static void
5645 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5647 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5649 base = emit_adjust_base_to_offset (base, offset);
5650 offset = 0;
5653 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5654 save_local_or_in_reg_p, action, SORR_ADVANCE);
5657 /* Emit a window_save insn. */
5659 static rtx_insn *
5660 emit_window_save (rtx increment)
5662 rtx_insn *insn = emit_insn (gen_window_save (increment));
5663 RTX_FRAME_RELATED_P (insn) = 1;
5665 /* The incoming return address (%o7) is saved in %i7. */
5666 add_reg_note (insn, REG_CFA_REGISTER,
5667 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5668 gen_rtx_REG (Pmode,
5669 INCOMING_RETURN_ADDR_REGNUM)));
5671 /* The window save event. */
5672 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5674 /* The CFA is %fp, the hard frame pointer. */
5675 add_reg_note (insn, REG_CFA_DEF_CFA,
5676 plus_constant (Pmode, hard_frame_pointer_rtx,
5677 INCOMING_FRAME_SP_OFFSET));
5679 return insn;
5682 /* Generate an increment for the stack pointer. */
5684 static rtx
5685 gen_stack_pointer_inc (rtx increment)
5687 return gen_rtx_SET (stack_pointer_rtx,
5688 gen_rtx_PLUS (Pmode,
5689 stack_pointer_rtx,
5690 increment));
5693 /* Expand the function prologue. The prologue is responsible for reserving
5694 storage for the frame, saving the call-saved registers and loading the
5695 GOT register if needed. */
5697 void
5698 sparc_expand_prologue (void)
5700 HOST_WIDE_INT size;
5701 rtx_insn *insn;
5703 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5704 on the final value of the flag means deferring the prologue/epilogue
5705 expansion until just before the second scheduling pass, which is too
5706 late to emit multiple epilogues or return insns.
5708 Of course we are making the assumption that the value of the flag
5709 will not change between now and its final value. Of the three parts
5710 of the formula, only the last one can reasonably vary. Let's take a
5711 closer look, after assuming that the first two ones are set to true
5712 (otherwise the last value is effectively silenced).
5714 If only_leaf_regs_used returns false, the global predicate will also
5715 be false so the actual frame size calculated below will be positive.
5716 As a consequence, the save_register_window insn will be emitted in
5717 the instruction stream; now this insn explicitly references %fp
5718 which is not a leaf register so only_leaf_regs_used will always
5719 return false subsequently.
5721 If only_leaf_regs_used returns true, we hope that the subsequent
5722 optimization passes won't cause non-leaf registers to pop up. For
5723 example, the regrename pass has special provisions to not rename to
5724 non-leaf registers in a leaf function. */
5725 sparc_leaf_function_p
5726 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
5728 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5730 if (flag_stack_usage_info)
5731 current_function_static_stack_size = size;
5733 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5735 if (crtl->is_leaf && !cfun->calls_alloca)
5737 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5738 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5739 size - STACK_CHECK_PROTECT);
5741 else if (size > 0)
5742 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5745 if (size == 0)
5746 ; /* do nothing. */
5747 else if (sparc_leaf_function_p)
5749 rtx size_int_rtx = GEN_INT (-size);
5751 if (size <= 4096)
5752 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5753 else if (size <= 8192)
5755 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5756 RTX_FRAME_RELATED_P (insn) = 1;
5758 /* %sp is still the CFA register. */
5759 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5761 else
5763 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5764 emit_move_insn (size_rtx, size_int_rtx);
5765 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5766 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5767 gen_stack_pointer_inc (size_int_rtx));
5770 RTX_FRAME_RELATED_P (insn) = 1;
5772 else
5774 rtx size_int_rtx = GEN_INT (-size);
5776 if (size <= 4096)
5777 emit_window_save (size_int_rtx);
5778 else if (size <= 8192)
5780 emit_window_save (GEN_INT (-4096));
5782 /* %sp is not the CFA register anymore. */
5783 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5785 /* Make sure no %fp-based store is issued until after the frame is
5786 established. The offset between the frame pointer and the stack
5787 pointer is calculated relative to the value of the stack pointer
5788 at the end of the function prologue, and moving instructions that
5789 access the stack via the frame pointer between the instructions
5790 that decrement the stack pointer could result in accessing the
5791 register window save area, which is volatile. */
5792 emit_insn (gen_frame_blockage ());
5794 else
5796 rtx size_rtx = gen_rtx_REG (Pmode, 1);
5797 emit_move_insn (size_rtx, size_int_rtx);
5798 emit_window_save (size_rtx);
5802 if (sparc_leaf_function_p)
5804 sparc_frame_base_reg = stack_pointer_rtx;
5805 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5807 else
5809 sparc_frame_base_reg = hard_frame_pointer_rtx;
5810 sparc_frame_base_offset = SPARC_STACK_BIAS;
5813 if (sparc_n_global_fp_regs > 0)
5814 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5815 sparc_frame_base_offset
5816 - sparc_apparent_frame_size,
5817 SORR_SAVE);
5819 /* Load the GOT register if needed. */
5820 if (crtl->uses_pic_offset_table)
5821 load_got_register ();
5823 /* Advertise that the data calculated just above are now valid. */
5824 sparc_prologue_data_valid_p = true;
5827 /* Expand the function prologue. The prologue is responsible for reserving
5828 storage for the frame, saving the call-saved registers and loading the
5829 GOT register if needed. */
5831 void
5832 sparc_flat_expand_prologue (void)
5834 HOST_WIDE_INT size;
5835 rtx_insn *insn;
5837 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
5839 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
5841 if (flag_stack_usage_info)
5842 current_function_static_stack_size = size;
5844 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
5846 if (crtl->is_leaf && !cfun->calls_alloca)
5848 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
5849 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT,
5850 size - STACK_CHECK_PROTECT);
5852 else if (size > 0)
5853 sparc_emit_probe_stack_range (STACK_CHECK_PROTECT, size);
5856 if (sparc_save_local_in_regs_p)
5857 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
5858 SORR_SAVE);
5860 if (size == 0)
5861 ; /* do nothing. */
5862 else
5864 rtx size_int_rtx, size_rtx;
5866 size_rtx = size_int_rtx = GEN_INT (-size);
5868 /* We establish the frame (i.e. decrement the stack pointer) first, even
5869 if we use a frame pointer, because we cannot clobber any call-saved
5870 registers, including the frame pointer, if we haven't created a new
5871 register save area, for the sake of compatibility with the ABI. */
5872 if (size <= 4096)
5873 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
5874 else if (size <= 8192 && !frame_pointer_needed)
5876 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
5877 RTX_FRAME_RELATED_P (insn) = 1;
5878 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
5880 else
5882 size_rtx = gen_rtx_REG (Pmode, 1);
5883 emit_move_insn (size_rtx, size_int_rtx);
5884 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
5885 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5886 gen_stack_pointer_inc (size_int_rtx));
5888 RTX_FRAME_RELATED_P (insn) = 1;
5890 /* Ensure nothing is scheduled until after the frame is established. */
5891 emit_insn (gen_blockage ());
5893 if (frame_pointer_needed)
5895 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
5896 gen_rtx_MINUS (Pmode,
5897 stack_pointer_rtx,
5898 size_rtx)));
5899 RTX_FRAME_RELATED_P (insn) = 1;
5901 add_reg_note (insn, REG_CFA_ADJUST_CFA,
5902 gen_rtx_SET (hard_frame_pointer_rtx,
5903 plus_constant (Pmode, stack_pointer_rtx,
5904 size)));
5907 if (return_addr_reg_needed_p (sparc_leaf_function_p))
5909 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
5910 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
5912 insn = emit_move_insn (i7, o7);
5913 RTX_FRAME_RELATED_P (insn) = 1;
5915 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
5917 /* Prevent this instruction from ever being considered dead,
5918 even if this function has no epilogue. */
5919 emit_use (i7);
5923 if (frame_pointer_needed)
5925 sparc_frame_base_reg = hard_frame_pointer_rtx;
5926 sparc_frame_base_offset = SPARC_STACK_BIAS;
5928 else
5930 sparc_frame_base_reg = stack_pointer_rtx;
5931 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
5934 if (sparc_n_global_fp_regs > 0)
5935 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5936 sparc_frame_base_offset
5937 - sparc_apparent_frame_size,
5938 SORR_SAVE);
5940 /* Load the GOT register if needed. */
5941 if (crtl->uses_pic_offset_table)
5942 load_got_register ();
5944 /* Advertise that the data calculated just above are now valid. */
5945 sparc_prologue_data_valid_p = true;
5948 /* This function generates the assembly code for function entry, which boils
5949 down to emitting the necessary .register directives. */
5951 static void
5952 sparc_asm_function_prologue (FILE *file)
5954 /* Check that the assumption we made in sparc_expand_prologue is valid. */
5955 if (!TARGET_FLAT)
5956 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
5958 sparc_output_scratch_registers (file);
5961 /* Expand the function epilogue, either normal or part of a sibcall.
5962 We emit all the instructions except the return or the call. */
5964 void
5965 sparc_expand_epilogue (bool for_eh)
5967 HOST_WIDE_INT size = sparc_frame_size;
5969 if (cfun->calls_alloca)
5970 emit_insn (gen_frame_blockage ());
5972 if (sparc_n_global_fp_regs > 0)
5973 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
5974 sparc_frame_base_offset
5975 - sparc_apparent_frame_size,
5976 SORR_RESTORE);
5978 if (size == 0 || for_eh)
5979 ; /* do nothing. */
5980 else if (sparc_leaf_function_p)
5982 if (size <= 4096)
5983 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
5984 else if (size <= 8192)
5986 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
5987 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
5989 else
5991 rtx reg = gen_rtx_REG (Pmode, 1);
5992 emit_move_insn (reg, GEN_INT (size));
5993 emit_insn (gen_stack_pointer_inc (reg));
5998 /* Expand the function epilogue, either normal or part of a sibcall.
5999 We emit all the instructions except the return or the call. */
6001 void
6002 sparc_flat_expand_epilogue (bool for_eh)
6004 HOST_WIDE_INT size = sparc_frame_size;
6006 if (sparc_n_global_fp_regs > 0)
6007 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6008 sparc_frame_base_offset
6009 - sparc_apparent_frame_size,
6010 SORR_RESTORE);
6012 /* If we have a frame pointer, we'll need both to restore it before the
6013 frame is destroyed and use its current value in destroying the frame.
6014 Since we don't have an atomic way to do that in the flat window model,
6015 we save the current value into a temporary register (%g1). */
6016 if (frame_pointer_needed && !for_eh)
6017 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6019 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6020 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6021 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6023 if (sparc_save_local_in_regs_p)
6024 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6025 sparc_frame_base_offset,
6026 SORR_RESTORE);
6028 if (size == 0 || for_eh)
6029 ; /* do nothing. */
6030 else if (frame_pointer_needed)
6032 /* Make sure the frame is destroyed after everything else is done. */
6033 emit_insn (gen_blockage ());
6035 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6037 else
6039 /* Likewise. */
6040 emit_insn (gen_blockage ());
6042 if (size <= 4096)
6043 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6044 else if (size <= 8192)
6046 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6047 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6049 else
6051 rtx reg = gen_rtx_REG (Pmode, 1);
6052 emit_move_insn (reg, GEN_INT (size));
6053 emit_insn (gen_stack_pointer_inc (reg));
6058 /* Return true if it is appropriate to emit `return' instructions in the
6059 body of a function. */
6061 bool
6062 sparc_can_use_return_insn_p (void)
6064 return sparc_prologue_data_valid_p
6065 && sparc_n_global_fp_regs == 0
6066 && TARGET_FLAT
6067 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6068 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6071 /* This function generates the assembly code for function exit. */
6073 static void
6074 sparc_asm_function_epilogue (FILE *file)
6076 /* If the last two instructions of a function are "call foo; dslot;"
6077 the return address might point to the first instruction in the next
6078 function and we have to output a dummy nop for the sake of sane
6079 backtraces in such cases. This is pointless for sibling calls since
6080 the return address is explicitly adjusted. */
6082 rtx_insn *insn = get_last_insn ();
6084 rtx last_real_insn = prev_real_insn (insn);
6085 if (last_real_insn
6086 && NONJUMP_INSN_P (last_real_insn)
6087 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6088 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6090 if (last_real_insn
6091 && CALL_P (last_real_insn)
6092 && !SIBLING_CALL_P (last_real_insn))
6093 fputs("\tnop\n", file);
6095 sparc_output_deferred_case_vectors ();
6098 /* Output a 'restore' instruction. */
6100 static void
6101 output_restore (rtx pat)
6103 rtx operands[3];
6105 if (! pat)
6107 fputs ("\t restore\n", asm_out_file);
6108 return;
6111 gcc_assert (GET_CODE (pat) == SET);
6113 operands[0] = SET_DEST (pat);
6114 pat = SET_SRC (pat);
6116 switch (GET_CODE (pat))
6118 case PLUS:
6119 operands[1] = XEXP (pat, 0);
6120 operands[2] = XEXP (pat, 1);
6121 output_asm_insn (" restore %r1, %2, %Y0", operands);
6122 break;
6123 case LO_SUM:
6124 operands[1] = XEXP (pat, 0);
6125 operands[2] = XEXP (pat, 1);
6126 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6127 break;
6128 case ASHIFT:
6129 operands[1] = XEXP (pat, 0);
6130 gcc_assert (XEXP (pat, 1) == const1_rtx);
6131 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6132 break;
6133 default:
6134 operands[1] = pat;
6135 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6136 break;
6140 /* Output a return. */
6142 const char *
6143 output_return (rtx_insn *insn)
6145 if (crtl->calls_eh_return)
6147 /* If the function uses __builtin_eh_return, the eh_return
6148 machinery occupies the delay slot. */
6149 gcc_assert (!final_sequence);
6151 if (flag_delayed_branch)
6153 if (!TARGET_FLAT && TARGET_V9)
6154 fputs ("\treturn\t%i7+8\n", asm_out_file);
6155 else
6157 if (!TARGET_FLAT)
6158 fputs ("\trestore\n", asm_out_file);
6160 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6163 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6165 else
6167 if (!TARGET_FLAT)
6168 fputs ("\trestore\n", asm_out_file);
6170 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6171 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6174 else if (sparc_leaf_function_p || TARGET_FLAT)
6176 /* This is a leaf or flat function so we don't have to bother restoring
6177 the register window, which frees us from dealing with the convoluted
6178 semantics of restore/return. We simply output the jump to the
6179 return address and the insn in the delay slot (if any). */
6181 return "jmp\t%%o7+%)%#";
6183 else
6185 /* This is a regular function so we have to restore the register window.
6186 We may have a pending insn for the delay slot, which will be either
6187 combined with the 'restore' instruction or put in the delay slot of
6188 the 'return' instruction. */
6190 if (final_sequence)
6192 rtx_insn *delay;
6193 rtx pat;
6194 int seen;
6196 delay = NEXT_INSN (insn);
6197 gcc_assert (delay);
6199 pat = PATTERN (delay);
6201 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6203 epilogue_renumber (&pat, 0);
6204 return "return\t%%i7+%)%#";
6206 else
6208 output_asm_insn ("jmp\t%%i7+%)", NULL);
6210 /* We're going to output the insn in the delay slot manually.
6211 Make sure to output its source location first. */
6212 PATTERN (delay) = gen_blockage ();
6213 INSN_CODE (delay) = -1;
6214 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6215 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6217 output_restore (pat);
6220 else
6222 /* The delay slot is empty. */
6223 if (TARGET_V9)
6224 return "return\t%%i7+%)\n\t nop";
6225 else if (flag_delayed_branch)
6226 return "jmp\t%%i7+%)\n\t restore";
6227 else
6228 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6232 return "";
6235 /* Output a sibling call. */
6237 const char *
6238 output_sibcall (rtx_insn *insn, rtx call_operand)
6240 rtx operands[1];
6242 gcc_assert (flag_delayed_branch);
6244 operands[0] = call_operand;
6246 if (sparc_leaf_function_p || TARGET_FLAT)
6248 /* This is a leaf or flat function so we don't have to bother restoring
6249 the register window. We simply output the jump to the function and
6250 the insn in the delay slot (if any). */
6252 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6254 if (final_sequence)
6255 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6256 operands);
6257 else
6258 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6259 it into branch if possible. */
6260 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6261 operands);
6263 else
6265 /* This is a regular function so we have to restore the register window.
6266 We may have a pending insn for the delay slot, which will be combined
6267 with the 'restore' instruction. */
6269 output_asm_insn ("call\t%a0, 0", operands);
6271 if (final_sequence)
6273 rtx_insn *delay;
6274 rtx pat;
6275 int seen;
6277 delay = NEXT_INSN (insn);
6278 gcc_assert (delay);
6280 pat = PATTERN (delay);
6282 /* We're going to output the insn in the delay slot manually.
6283 Make sure to output its source location first. */
6284 PATTERN (delay) = gen_blockage ();
6285 INSN_CODE (delay) = -1;
6286 final_scan_insn (delay, asm_out_file, optimize, 0, &seen);
6287 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6289 output_restore (pat);
6291 else
6292 output_restore (NULL_RTX);
6295 return "";
6298 /* Functions for handling argument passing.
6300 For 32-bit, the first 6 args are normally in registers and the rest are
6301 pushed. Any arg that starts within the first 6 words is at least
6302 partially passed in a register unless its data type forbids.
6304 For 64-bit, the argument registers are laid out as an array of 16 elements
6305 and arguments are added sequentially. The first 6 int args and up to the
6306 first 16 fp args (depending on size) are passed in regs.
6308 Slot Stack Integral Float Float in structure Double Long Double
6309 ---- ----- -------- ----- ------------------ ------ -----------
6310 15 [SP+248] %f31 %f30,%f31 %d30
6311 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6312 13 [SP+232] %f27 %f26,%f27 %d26
6313 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6314 11 [SP+216] %f23 %f22,%f23 %d22
6315 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6316 9 [SP+200] %f19 %f18,%f19 %d18
6317 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6318 7 [SP+184] %f15 %f14,%f15 %d14
6319 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6320 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6321 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6322 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6323 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6324 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6325 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6327 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6329 Integral arguments are always passed as 64-bit quantities appropriately
6330 extended.
6332 Passing of floating point values is handled as follows.
6333 If a prototype is in scope:
6334 If the value is in a named argument (i.e. not a stdarg function or a
6335 value not part of the `...') then the value is passed in the appropriate
6336 fp reg.
6337 If the value is part of the `...' and is passed in one of the first 6
6338 slots then the value is passed in the appropriate int reg.
6339 If the value is part of the `...' and is not passed in one of the first 6
6340 slots then the value is passed in memory.
6341 If a prototype is not in scope:
6342 If the value is one of the first 6 arguments the value is passed in the
6343 appropriate integer reg and the appropriate fp reg.
6344 If the value is not one of the first 6 arguments the value is passed in
6345 the appropriate fp reg and in memory.
6348 Summary of the calling conventions implemented by GCC on the SPARC:
6350 32-bit ABI:
6351 size argument return value
6353 small integer <4 int. reg. int. reg.
6354 word 4 int. reg. int. reg.
6355 double word 8 int. reg. int. reg.
6357 _Complex small integer <8 int. reg. int. reg.
6358 _Complex word 8 int. reg. int. reg.
6359 _Complex double word 16 memory int. reg.
6361 vector integer <=8 int. reg. FP reg.
6362 vector integer >8 memory memory
6364 float 4 int. reg. FP reg.
6365 double 8 int. reg. FP reg.
6366 long double 16 memory memory
6368 _Complex float 8 memory FP reg.
6369 _Complex double 16 memory FP reg.
6370 _Complex long double 32 memory FP reg.
6372 vector float any memory memory
6374 aggregate any memory memory
6378 64-bit ABI:
6379 size argument return value
6381 small integer <8 int. reg. int. reg.
6382 word 8 int. reg. int. reg.
6383 double word 16 int. reg. int. reg.
6385 _Complex small integer <16 int. reg. int. reg.
6386 _Complex word 16 int. reg. int. reg.
6387 _Complex double word 32 memory int. reg.
6389 vector integer <=16 FP reg. FP reg.
6390 vector integer 16<s<=32 memory FP reg.
6391 vector integer >32 memory memory
6393 float 4 FP reg. FP reg.
6394 double 8 FP reg. FP reg.
6395 long double 16 FP reg. FP reg.
6397 _Complex float 8 FP reg. FP reg.
6398 _Complex double 16 FP reg. FP reg.
6399 _Complex long double 32 memory FP reg.
6401 vector float <=16 FP reg. FP reg.
6402 vector float 16<s<=32 memory FP reg.
6403 vector float >32 memory memory
6405 aggregate <=16 reg. reg.
6406 aggregate 16<s<=32 memory reg.
6407 aggregate >32 memory memory
6411 Note #1: complex floating-point types follow the extended SPARC ABIs as
6412 implemented by the Sun compiler.
6414 Note #2: integral vector types follow the scalar floating-point types
6415 conventions to match what is implemented by the Sun VIS SDK.
6417 Note #3: floating-point vector types follow the aggregate types
6418 conventions. */
6421 /* Maximum number of int regs for args. */
6422 #define SPARC_INT_ARG_MAX 6
6423 /* Maximum number of fp regs for args. */
6424 #define SPARC_FP_ARG_MAX 16
6425 /* Number of words (partially) occupied for a given size in units. */
6426 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6428 /* Handle the INIT_CUMULATIVE_ARGS macro.
6429 Initialize a variable CUM of type CUMULATIVE_ARGS
6430 for a call to a function whose data type is FNTYPE.
6431 For a library call, FNTYPE is 0. */
6433 void
6434 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6436 cum->words = 0;
6437 cum->prototype_p = fntype && prototype_p (fntype);
6438 cum->libcall_p = !fntype;
6441 /* Handle promotion of pointer and integer arguments. */
6443 static machine_mode
6444 sparc_promote_function_mode (const_tree type, machine_mode mode,
6445 int *punsignedp, const_tree, int)
6447 if (type && POINTER_TYPE_P (type))
6449 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6450 return Pmode;
6453 /* Integral arguments are passed as full words, as per the ABI. */
6454 if (GET_MODE_CLASS (mode) == MODE_INT
6455 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6456 return word_mode;
6458 return mode;
6461 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6463 static bool
6464 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6466 return TARGET_ARCH64 ? true : false;
6469 /* Traverse the record TYPE recursively and call FUNC on its fields.
6470 NAMED is true if this is for a named parameter. DATA is passed
6471 to FUNC for each field. OFFSET is the starting position and
6472 PACKED is true if we are inside a packed record. */
6474 template <typename T, void Func (const_tree, HOST_WIDE_INT, bool, T*)>
6475 static void
6476 traverse_record_type (const_tree type, bool named, T *data,
6477 HOST_WIDE_INT offset = 0, bool packed = false)
6479 /* The ABI obviously doesn't specify how packed structures are passed.
6480 These are passed in integer regs if possible, otherwise memory. */
6481 if (!packed)
6482 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6483 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6485 packed = true;
6486 break;
6489 /* Walk the real fields, but skip those with no size or a zero size.
6490 ??? Fields with variable offset are handled as having zero offset. */
6491 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6492 if (TREE_CODE (field) == FIELD_DECL)
6494 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6495 continue;
6497 HOST_WIDE_INT bitpos = offset;
6498 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6499 bitpos += int_bit_position (field);
6501 tree field_type = TREE_TYPE (field);
6502 if (TREE_CODE (field_type) == RECORD_TYPE)
6503 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6504 packed);
6505 else
6507 const bool fp_type
6508 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6509 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6510 data);
6515 /* Handle recursive register classifying for structure layout. */
6517 typedef struct
6519 bool fp_regs; /* true if field eligible to FP registers. */
6520 bool fp_regs_in_first_word; /* true if such field in first word. */
6521 } classify_data_t;
6523 /* A subroutine of function_arg_slotno. Classify the field. */
6525 inline void
6526 classify_registers (const_tree, HOST_WIDE_INT bitpos, bool fp,
6527 classify_data_t *data)
6529 if (fp)
6531 data->fp_regs = true;
6532 if (bitpos < BITS_PER_WORD)
6533 data->fp_regs_in_first_word = true;
6537 /* Compute the slot number to pass an argument in.
6538 Return the slot number or -1 if passing on the stack.
6540 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6541 the preceding args and about the function being called.
6542 MODE is the argument's machine mode.
6543 TYPE is the data type of the argument (as a tree).
6544 This is null for libcalls where that information may
6545 not be available.
6546 NAMED is nonzero if this argument is a named parameter
6547 (otherwise it is an extra parameter matching an ellipsis).
6548 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6549 *PREGNO records the register number to use if scalar type.
6550 *PPADDING records the amount of padding needed in words. */
6552 static int
6553 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6554 const_tree type, bool named, bool incoming,
6555 int *pregno, int *ppadding)
6557 int regbase = (incoming
6558 ? SPARC_INCOMING_INT_ARG_FIRST
6559 : SPARC_OUTGOING_INT_ARG_FIRST);
6560 int slotno = cum->words;
6561 enum mode_class mclass;
6562 int regno;
6564 *ppadding = 0;
6566 if (type && TREE_ADDRESSABLE (type))
6567 return -1;
6569 if (TARGET_ARCH32
6570 && mode == BLKmode
6571 && type
6572 && TYPE_ALIGN (type) % PARM_BOUNDARY != 0)
6573 return -1;
6575 /* For SPARC64, objects requiring 16-byte alignment get it. */
6576 if (TARGET_ARCH64
6577 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6578 && (slotno & 1) != 0)
6579 slotno++, *ppadding = 1;
6581 mclass = GET_MODE_CLASS (mode);
6582 if (type && TREE_CODE (type) == VECTOR_TYPE)
6584 /* Vector types deserve special treatment because they are
6585 polymorphic wrt their mode, depending upon whether VIS
6586 instructions are enabled. */
6587 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6589 /* The SPARC port defines no floating-point vector modes. */
6590 gcc_assert (mode == BLKmode);
6592 else
6594 /* Integral vector types should either have a vector
6595 mode or an integral mode, because we are guaranteed
6596 by pass_by_reference that their size is not greater
6597 than 16 bytes and TImode is 16-byte wide. */
6598 gcc_assert (mode != BLKmode);
6600 /* Vector integers are handled like floats according to
6601 the Sun VIS SDK. */
6602 mclass = MODE_FLOAT;
6606 switch (mclass)
6608 case MODE_FLOAT:
6609 case MODE_COMPLEX_FLOAT:
6610 case MODE_VECTOR_INT:
6611 if (TARGET_ARCH64 && TARGET_FPU && named)
6613 /* If all arg slots are filled, then must pass on stack. */
6614 if (slotno >= SPARC_FP_ARG_MAX)
6615 return -1;
6617 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6618 /* Arguments filling only one single FP register are
6619 right-justified in the outer double FP register. */
6620 if (GET_MODE_SIZE (mode) <= 4)
6621 regno++;
6622 break;
6624 /* fallthrough */
6626 case MODE_INT:
6627 case MODE_COMPLEX_INT:
6628 /* If all arg slots are filled, then must pass on stack. */
6629 if (slotno >= SPARC_INT_ARG_MAX)
6630 return -1;
6632 regno = regbase + slotno;
6633 break;
6635 case MODE_RANDOM:
6636 if (mode == VOIDmode)
6637 /* MODE is VOIDmode when generating the actual call. */
6638 return -1;
6640 gcc_assert (mode == BLKmode);
6642 if (TARGET_ARCH32
6643 || !type
6644 || (TREE_CODE (type) != RECORD_TYPE
6645 && TREE_CODE (type) != VECTOR_TYPE))
6647 /* If all arg slots are filled, then must pass on stack. */
6648 if (slotno >= SPARC_INT_ARG_MAX)
6649 return -1;
6651 regno = regbase + slotno;
6653 else /* TARGET_ARCH64 && type */
6655 /* If all arg slots are filled, then must pass on stack. */
6656 if (slotno >= SPARC_FP_ARG_MAX)
6657 return -1;
6659 if (TREE_CODE (type) == RECORD_TYPE)
6661 classify_data_t data = { false, false };
6662 traverse_record_type<classify_data_t, classify_registers>
6663 (type, named, &data);
6665 if (data.fp_regs)
6667 /* If all FP slots are filled except for the last one and
6668 there is no FP field in the first word, then must pass
6669 on stack. */
6670 if (slotno >= SPARC_FP_ARG_MAX - 1
6671 && !data.fp_regs_in_first_word)
6672 return -1;
6674 else
6676 /* If all int slots are filled, then must pass on stack. */
6677 if (slotno >= SPARC_INT_ARG_MAX)
6678 return -1;
6682 /* PREGNO isn't set since both int and FP regs can be used. */
6683 return slotno;
6685 break;
6687 default :
6688 gcc_unreachable ();
6691 *pregno = regno;
6692 return slotno;
6695 /* Handle recursive register counting/assigning for structure layout. */
6697 typedef struct
6699 int slotno; /* slot number of the argument. */
6700 int regbase; /* regno of the base register. */
6701 int intoffset; /* offset of the first pending integer field. */
6702 int nregs; /* number of words passed in registers. */
6703 bool stack; /* true if part of the argument is on the stack. */
6704 rtx ret; /* return expression being built. */
6705 } assign_data_t;
6707 /* A subroutine of function_arg_record_value. Compute the number of integer
6708 registers to be assigned between PARMS->intoffset and BITPOS. Return
6709 true if at least one integer register is assigned or false otherwise. */
6711 static bool
6712 compute_int_layout (HOST_WIDE_INT bitpos, assign_data_t *data, int *pnregs)
6714 if (data->intoffset < 0)
6715 return false;
6717 const int intoffset = data->intoffset;
6718 data->intoffset = -1;
6720 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6721 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
6722 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
6723 int nregs = (endbit - startbit) / BITS_PER_WORD;
6725 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
6727 nregs = SPARC_INT_ARG_MAX - this_slotno;
6729 /* We need to pass this field (partly) on the stack. */
6730 data->stack = 1;
6733 if (nregs <= 0)
6734 return false;
6736 *pnregs = nregs;
6737 return true;
6740 /* A subroutine of function_arg_record_value. Compute the number and the mode
6741 of the FP registers to be assigned for FIELD. Return true if at least one
6742 FP register is assigned or false otherwise. */
6744 static bool
6745 compute_fp_layout (const_tree field, HOST_WIDE_INT bitpos,
6746 assign_data_t *data,
6747 int *pnregs, machine_mode *pmode)
6749 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6750 machine_mode mode = DECL_MODE (field);
6751 int nregs, nslots;
6753 /* Slots are counted as words while regs are counted as having the size of
6754 the (inner) mode. */
6755 if (TREE_CODE (TREE_TYPE (field)) == VECTOR_TYPE && mode == BLKmode)
6757 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6758 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
6760 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
6762 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
6763 nregs = 2;
6765 else
6766 nregs = 1;
6768 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
6770 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
6772 nslots = SPARC_FP_ARG_MAX - this_slotno;
6773 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
6775 /* We need to pass this field (partly) on the stack. */
6776 data->stack = 1;
6778 if (nregs <= 0)
6779 return false;
6782 *pnregs = nregs;
6783 *pmode = mode;
6784 return true;
6787 /* A subroutine of function_arg_record_value. Count the number of registers
6788 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
6790 inline void
6791 count_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6792 assign_data_t *data)
6794 if (fp)
6796 int nregs;
6797 machine_mode mode;
6799 if (compute_int_layout (bitpos, data, &nregs))
6800 data->nregs += nregs;
6802 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
6803 data->nregs += nregs;
6805 else
6807 if (data->intoffset < 0)
6808 data->intoffset = bitpos;
6812 /* A subroutine of function_arg_record_value. Assign the bits of the
6813 structure between PARMS->intoffset and BITPOS to integer registers. */
6815 static void
6816 assign_int_registers (HOST_WIDE_INT bitpos, assign_data_t *data)
6818 int intoffset = data->intoffset;
6819 machine_mode mode;
6820 int nregs;
6822 if (!compute_int_layout (bitpos, data, &nregs))
6823 return;
6825 /* If this is the trailing part of a word, only load that much into
6826 the register. Otherwise load the whole register. Note that in
6827 the latter case we may pick up unwanted bits. It's not a problem
6828 at the moment but may wish to revisit. */
6829 if (intoffset % BITS_PER_WORD != 0)
6830 mode = smallest_int_mode_for_size (BITS_PER_WORD
6831 - intoffset % BITS_PER_WORD);
6832 else
6833 mode = word_mode;
6835 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
6836 unsigned int regno = data->regbase + this_slotno;
6837 intoffset /= BITS_PER_UNIT;
6841 rtx reg = gen_rtx_REG (mode, regno);
6842 XVECEXP (data->ret, 0, data->stack + data->nregs)
6843 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
6844 data->nregs += 1;
6845 mode = word_mode;
6846 regno += 1;
6847 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
6849 while (--nregs > 0);
6852 /* A subroutine of function_arg_record_value. Assign FIELD at position
6853 BITPOS to FP registers. */
6855 static void
6856 assign_fp_registers (const_tree field, HOST_WIDE_INT bitpos,
6857 assign_data_t *data)
6859 int nregs;
6860 machine_mode mode;
6862 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
6863 return;
6865 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
6866 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
6867 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
6868 regno++;
6869 int pos = bitpos / BITS_PER_UNIT;
6873 rtx reg = gen_rtx_REG (mode, regno);
6874 XVECEXP (data->ret, 0, data->stack + data->nregs)
6875 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
6876 data->nregs += 1;
6877 regno += GET_MODE_SIZE (mode) / 4;
6878 pos += GET_MODE_SIZE (mode);
6880 while (--nregs > 0);
6883 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
6884 the structure between PARMS->intoffset and BITPOS to registers. */
6886 inline void
6887 assign_registers (const_tree field, HOST_WIDE_INT bitpos, bool fp,
6888 assign_data_t *data)
6890 if (fp)
6892 assign_int_registers (bitpos, data);
6894 assign_fp_registers (field, bitpos, data);
6896 else
6898 if (data->intoffset < 0)
6899 data->intoffset = bitpos;
6903 /* Used by function_arg and sparc_function_value_1 to implement the complex
6904 conventions of the 64-bit ABI for passing and returning structures.
6905 Return an expression valid as a return value for the FUNCTION_ARG
6906 and TARGET_FUNCTION_VALUE.
6908 TYPE is the data type of the argument (as a tree).
6909 This is null for libcalls where that information may
6910 not be available.
6911 MODE is the argument's machine mode.
6912 SLOTNO is the index number of the argument's slot in the parameter array.
6913 NAMED is true if this argument is a named parameter
6914 (otherwise it is an extra parameter matching an ellipsis).
6915 REGBASE is the regno of the base register for the parameter array. */
6917 static rtx
6918 function_arg_record_value (const_tree type, machine_mode mode,
6919 int slotno, bool named, int regbase)
6921 HOST_WIDE_INT typesize = int_size_in_bytes (type);
6922 assign_data_t data;
6923 int nregs;
6925 data.slotno = slotno;
6926 data.regbase = regbase;
6928 /* Count how many registers we need. */
6929 data.nregs = 0;
6930 data.intoffset = 0;
6931 data.stack = false;
6932 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
6934 /* Take into account pending integer fields. */
6935 if (compute_int_layout (typesize * BITS_PER_UNIT, &data, &nregs))
6936 data.nregs += nregs;
6938 /* Allocate the vector and handle some annoying special cases. */
6939 nregs = data.nregs;
6941 if (nregs == 0)
6943 /* ??? Empty structure has no value? Duh? */
6944 if (typesize <= 0)
6946 /* Though there's nothing really to store, return a word register
6947 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
6948 leads to breakage due to the fact that there are zero bytes to
6949 load. */
6950 return gen_rtx_REG (mode, regbase);
6953 /* ??? C++ has structures with no fields, and yet a size. Give up
6954 for now and pass everything back in integer registers. */
6955 nregs = (typesize + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
6956 if (nregs + slotno > SPARC_INT_ARG_MAX)
6957 nregs = SPARC_INT_ARG_MAX - slotno;
6960 gcc_assert (nregs > 0);
6962 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
6964 /* If at least one field must be passed on the stack, generate
6965 (parallel [(expr_list (nil) ...) ...]) so that all fields will
6966 also be passed on the stack. We can't do much better because the
6967 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
6968 of structures for which the fields passed exclusively in registers
6969 are not at the beginning of the structure. */
6970 if (data.stack)
6971 XVECEXP (data.ret, 0, 0)
6972 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
6974 /* Assign the registers. */
6975 data.nregs = 0;
6976 data.intoffset = 0;
6977 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
6979 /* Assign pending integer fields. */
6980 assign_int_registers (typesize * BITS_PER_UNIT, &data);
6982 gcc_assert (data.nregs == nregs);
6984 return data.ret;
6987 /* Used by function_arg and sparc_function_value_1 to implement the conventions
6988 of the 64-bit ABI for passing and returning unions.
6989 Return an expression valid as a return value for the FUNCTION_ARG
6990 and TARGET_FUNCTION_VALUE.
6992 SIZE is the size in bytes of the union.
6993 MODE is the argument's machine mode.
6994 REGNO is the hard register the union will be passed in. */
6996 static rtx
6997 function_arg_union_value (int size, machine_mode mode, int slotno,
6998 int regno)
7000 int nwords = CEIL_NWORDS (size), i;
7001 rtx regs;
7003 /* See comment in previous function for empty structures. */
7004 if (nwords == 0)
7005 return gen_rtx_REG (mode, regno);
7007 if (slotno == SPARC_INT_ARG_MAX - 1)
7008 nwords = 1;
7010 regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7012 for (i = 0; i < nwords; i++)
7014 /* Unions are passed left-justified. */
7015 XVECEXP (regs, 0, i)
7016 = gen_rtx_EXPR_LIST (VOIDmode,
7017 gen_rtx_REG (word_mode, regno),
7018 GEN_INT (UNITS_PER_WORD * i));
7019 regno++;
7022 return regs;
7025 /* Used by function_arg and sparc_function_value_1 to implement the conventions
7026 for passing and returning BLKmode vectors.
7027 Return an expression valid as a return value for the FUNCTION_ARG
7028 and TARGET_FUNCTION_VALUE.
7030 SIZE is the size in bytes of the vector.
7031 REGNO is the FP hard register the vector will be passed in. */
7033 static rtx
7034 function_arg_vector_value (int size, int regno)
7036 const int nregs = MAX (1, size / 8);
7037 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nregs));
7039 if (size < 8)
7040 XVECEXP (regs, 0, 0)
7041 = gen_rtx_EXPR_LIST (VOIDmode,
7042 gen_rtx_REG (SImode, regno),
7043 const0_rtx);
7044 else
7045 for (int i = 0; i < nregs; i++)
7046 XVECEXP (regs, 0, i)
7047 = gen_rtx_EXPR_LIST (VOIDmode,
7048 gen_rtx_REG (DImode, regno + 2*i),
7049 GEN_INT (i*8));
7051 return regs;
7054 /* Determine where to put an argument to a function.
7055 Value is zero to push the argument on the stack,
7056 or a hard register in which to store the argument.
7058 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7059 the preceding args and about the function being called.
7060 MODE is the argument's machine mode.
7061 TYPE is the data type of the argument (as a tree).
7062 This is null for libcalls where that information may
7063 not be available.
7064 NAMED is true if this argument is a named parameter
7065 (otherwise it is an extra parameter matching an ellipsis).
7066 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7067 TARGET_FUNCTION_INCOMING_ARG. */
7069 static rtx
7070 sparc_function_arg_1 (cumulative_args_t cum_v, machine_mode mode,
7071 const_tree type, bool named, bool incoming)
7073 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7075 int regbase = (incoming
7076 ? SPARC_INCOMING_INT_ARG_FIRST
7077 : SPARC_OUTGOING_INT_ARG_FIRST);
7078 int slotno, regno, padding;
7079 enum mode_class mclass = GET_MODE_CLASS (mode);
7081 slotno = function_arg_slotno (cum, mode, type, named, incoming,
7082 &regno, &padding);
7083 if (slotno == -1)
7084 return 0;
7086 /* Vector types deserve special treatment because they are polymorphic wrt
7087 their mode, depending upon whether VIS instructions are enabled. */
7088 if (type && TREE_CODE (type) == VECTOR_TYPE)
7090 HOST_WIDE_INT size = int_size_in_bytes (type);
7091 gcc_assert ((TARGET_ARCH32 && size <= 8)
7092 || (TARGET_ARCH64 && size <= 16));
7094 if (mode == BLKmode)
7095 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST + 2*slotno);
7097 mclass = MODE_FLOAT;
7100 if (TARGET_ARCH32)
7101 return gen_rtx_REG (mode, regno);
7103 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7104 and are promoted to registers if possible. */
7105 if (type && TREE_CODE (type) == RECORD_TYPE)
7107 HOST_WIDE_INT size = int_size_in_bytes (type);
7108 gcc_assert (size <= 16);
7110 return function_arg_record_value (type, mode, slotno, named, regbase);
7113 /* Unions up to 16 bytes in size are passed in integer registers. */
7114 else if (type && TREE_CODE (type) == UNION_TYPE)
7116 HOST_WIDE_INT size = int_size_in_bytes (type);
7117 gcc_assert (size <= 16);
7119 return function_arg_union_value (size, mode, slotno, regno);
7122 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7123 but also have the slot allocated for them.
7124 If no prototype is in scope fp values in register slots get passed
7125 in two places, either fp regs and int regs or fp regs and memory. */
7126 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7127 && SPARC_FP_REG_P (regno))
7129 rtx reg = gen_rtx_REG (mode, regno);
7130 if (cum->prototype_p || cum->libcall_p)
7131 return reg;
7132 else
7134 rtx v0, v1;
7136 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7138 int intreg;
7140 /* On incoming, we don't need to know that the value
7141 is passed in %f0 and %i0, and it confuses other parts
7142 causing needless spillage even on the simplest cases. */
7143 if (incoming)
7144 return reg;
7146 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7147 + (regno - SPARC_FP_ARG_FIRST) / 2);
7149 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7150 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7151 const0_rtx);
7152 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7154 else
7156 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7157 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7158 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7163 /* All other aggregate types are passed in an integer register in a mode
7164 corresponding to the size of the type. */
7165 else if (type && AGGREGATE_TYPE_P (type))
7167 HOST_WIDE_INT size = int_size_in_bytes (type);
7168 gcc_assert (size <= 16);
7170 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7173 return gen_rtx_REG (mode, regno);
7176 /* Handle the TARGET_FUNCTION_ARG target hook. */
7178 static rtx
7179 sparc_function_arg (cumulative_args_t cum, machine_mode mode,
7180 const_tree type, bool named)
7182 return sparc_function_arg_1 (cum, mode, type, named, false);
7185 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7187 static rtx
7188 sparc_function_incoming_arg (cumulative_args_t cum, machine_mode mode,
7189 const_tree type, bool named)
7191 return sparc_function_arg_1 (cum, mode, type, named, true);
7194 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7196 static unsigned int
7197 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7199 return ((TARGET_ARCH64
7200 && (GET_MODE_ALIGNMENT (mode) == 128
7201 || (type && TYPE_ALIGN (type) == 128)))
7202 ? 128
7203 : PARM_BOUNDARY);
7206 /* For an arg passed partly in registers and partly in memory,
7207 this is the number of bytes of registers used.
7208 For args passed entirely in registers or entirely in memory, zero.
7210 Any arg that starts in the first 6 regs but won't entirely fit in them
7211 needs partial registers on v8. On v9, structures with integer
7212 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7213 values that begin in the last fp reg [where "last fp reg" varies with the
7214 mode] will be split between that reg and memory. */
7216 static int
7217 sparc_arg_partial_bytes (cumulative_args_t cum, machine_mode mode,
7218 tree type, bool named)
7220 int slotno, regno, padding;
7222 /* We pass false for incoming here, it doesn't matter. */
7223 slotno = function_arg_slotno (get_cumulative_args (cum), mode, type, named,
7224 false, &regno, &padding);
7226 if (slotno == -1)
7227 return 0;
7229 if (TARGET_ARCH32)
7231 if ((slotno + (mode == BLKmode
7232 ? CEIL_NWORDS (int_size_in_bytes (type))
7233 : CEIL_NWORDS (GET_MODE_SIZE (mode))))
7234 > SPARC_INT_ARG_MAX)
7235 return (SPARC_INT_ARG_MAX - slotno) * UNITS_PER_WORD;
7237 else
7239 /* We are guaranteed by pass_by_reference that the size of the
7240 argument is not greater than 16 bytes, so we only need to return
7241 one word if the argument is partially passed in registers. */
7243 if (type && AGGREGATE_TYPE_P (type))
7245 int size = int_size_in_bytes (type);
7247 if (size > UNITS_PER_WORD
7248 && (slotno == SPARC_INT_ARG_MAX - 1
7249 || slotno == SPARC_FP_ARG_MAX - 1))
7250 return UNITS_PER_WORD;
7252 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_INT
7253 || (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT
7254 && ! (TARGET_FPU && named)))
7256 /* The complex types are passed as packed types. */
7257 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD
7258 && slotno == SPARC_INT_ARG_MAX - 1)
7259 return UNITS_PER_WORD;
7261 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
7263 if ((slotno + GET_MODE_SIZE (mode) / UNITS_PER_WORD)
7264 > SPARC_FP_ARG_MAX)
7265 return UNITS_PER_WORD;
7269 return 0;
7272 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
7273 Specify whether to pass the argument by reference. */
7275 static bool
7276 sparc_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
7277 machine_mode mode, const_tree type,
7278 bool named ATTRIBUTE_UNUSED)
7280 if (TARGET_ARCH32)
7281 /* Original SPARC 32-bit ABI says that structures and unions,
7282 and quad-precision floats are passed by reference. For Pascal,
7283 also pass arrays by reference. All other base types are passed
7284 in registers.
7286 Extended ABI (as implemented by the Sun compiler) says that all
7287 complex floats are passed by reference. Pass complex integers
7288 in registers up to 8 bytes. More generally, enforce the 2-word
7289 cap for passing arguments in registers.
7291 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7292 integers are passed like floats of the same size, that is in
7293 registers up to 8 bytes. Pass all vector floats by reference
7294 like structure and unions. */
7295 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7296 || mode == SCmode
7297 /* Catch CDImode, TFmode, DCmode and TCmode. */
7298 || GET_MODE_SIZE (mode) > 8
7299 || (type
7300 && TREE_CODE (type) == VECTOR_TYPE
7301 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7302 else
7303 /* Original SPARC 64-bit ABI says that structures and unions
7304 smaller than 16 bytes are passed in registers, as well as
7305 all other base types.
7307 Extended ABI (as implemented by the Sun compiler) says that
7308 complex floats are passed in registers up to 16 bytes. Pass
7309 all complex integers in registers up to 16 bytes. More generally,
7310 enforce the 2-word cap for passing arguments in registers.
7312 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7313 integers are passed like floats of the same size, that is in
7314 registers (up to 16 bytes). Pass all vector floats like structure
7315 and unions. */
7316 return ((type
7317 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == VECTOR_TYPE)
7318 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
7319 /* Catch CTImode and TCmode. */
7320 || GET_MODE_SIZE (mode) > 16);
7323 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7324 Update the data in CUM to advance over an argument
7325 of mode MODE and data type TYPE.
7326 TYPE is null for libcalls where that information may not be available. */
7328 static void
7329 sparc_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
7330 const_tree type, bool named)
7332 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7333 int regno, padding;
7335 /* We pass false for incoming here, it doesn't matter. */
7336 function_arg_slotno (cum, mode, type, named, false, &regno, &padding);
7338 /* If argument requires leading padding, add it. */
7339 cum->words += padding;
7341 if (TARGET_ARCH32)
7342 cum->words += (mode == BLKmode
7343 ? CEIL_NWORDS (int_size_in_bytes (type))
7344 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7345 else
7347 if (type && AGGREGATE_TYPE_P (type))
7349 int size = int_size_in_bytes (type);
7351 if (size <= 8)
7352 ++cum->words;
7353 else if (size <= 16)
7354 cum->words += 2;
7355 else /* passed by reference */
7356 ++cum->words;
7358 else
7359 cum->words += (mode == BLKmode
7360 ? CEIL_NWORDS (int_size_in_bytes (type))
7361 : CEIL_NWORDS (GET_MODE_SIZE (mode)));
7365 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7366 are always stored left shifted in their argument slot. */
7368 static pad_direction
7369 sparc_function_arg_padding (machine_mode mode, const_tree type)
7371 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7372 return PAD_UPWARD;
7374 /* Fall back to the default. */
7375 return default_function_arg_padding (mode, type);
7378 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7379 Specify whether to return the return value in memory. */
7381 static bool
7382 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7384 if (TARGET_ARCH32)
7385 /* Original SPARC 32-bit ABI says that structures and unions,
7386 and quad-precision floats are returned in memory. All other
7387 base types are returned in registers.
7389 Extended ABI (as implemented by the Sun compiler) says that
7390 all complex floats are returned in registers (8 FP registers
7391 at most for '_Complex long double'). Return all complex integers
7392 in registers (4 at most for '_Complex long long').
7394 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7395 integers are returned like floats of the same size, that is in
7396 registers up to 8 bytes and in memory otherwise. Return all
7397 vector floats in memory like structure and unions; note that
7398 they always have BLKmode like the latter. */
7399 return (TYPE_MODE (type) == BLKmode
7400 || TYPE_MODE (type) == TFmode
7401 || (TREE_CODE (type) == VECTOR_TYPE
7402 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7403 else
7404 /* Original SPARC 64-bit ABI says that structures and unions
7405 smaller than 32 bytes are returned in registers, as well as
7406 all other base types.
7408 Extended ABI (as implemented by the Sun compiler) says that all
7409 complex floats are returned in registers (8 FP registers at most
7410 for '_Complex long double'). Return all complex integers in
7411 registers (4 at most for '_Complex TItype').
7413 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7414 integers are returned like floats of the same size, that is in
7415 registers. Return all vector floats like structure and unions;
7416 note that they always have BLKmode like the latter. */
7417 return (TYPE_MODE (type) == BLKmode
7418 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7421 /* Handle the TARGET_STRUCT_VALUE target hook.
7422 Return where to find the structure return value address. */
7424 static rtx
7425 sparc_struct_value_rtx (tree fndecl, int incoming)
7427 if (TARGET_ARCH64)
7428 return 0;
7429 else
7431 rtx mem;
7433 if (incoming)
7434 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7435 STRUCT_VALUE_OFFSET));
7436 else
7437 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7438 STRUCT_VALUE_OFFSET));
7440 /* Only follow the SPARC ABI for fixed-size structure returns.
7441 Variable size structure returns are handled per the normal
7442 procedures in GCC. This is enabled by -mstd-struct-return */
7443 if (incoming == 2
7444 && sparc_std_struct_return
7445 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7446 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7448 /* We must check and adjust the return address, as it is optional
7449 as to whether the return object is really provided. */
7450 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7451 rtx scratch = gen_reg_rtx (SImode);
7452 rtx_code_label *endlab = gen_label_rtx ();
7454 /* Calculate the return object size. */
7455 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7456 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7457 /* Construct a temporary return value. */
7458 rtx temp_val
7459 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7461 /* Implement SPARC 32-bit psABI callee return struct checking:
7463 Fetch the instruction where we will return to and see if
7464 it's an unimp instruction (the most significant 10 bits
7465 will be zero). */
7466 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7467 plus_constant (Pmode,
7468 ret_reg, 8)));
7469 /* Assume the size is valid and pre-adjust. */
7470 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7471 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7472 0, endlab);
7473 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7474 /* Write the address of the memory pointed to by temp_val into
7475 the memory pointed to by mem. */
7476 emit_move_insn (mem, XEXP (temp_val, 0));
7477 emit_label (endlab);
7480 return mem;
7484 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7485 For v9, function return values are subject to the same rules as arguments,
7486 except that up to 32 bytes may be returned in registers. */
7488 static rtx
7489 sparc_function_value_1 (const_tree type, machine_mode mode,
7490 bool outgoing)
7492 /* Beware that the two values are swapped here wrt function_arg. */
7493 int regbase = (outgoing
7494 ? SPARC_INCOMING_INT_ARG_FIRST
7495 : SPARC_OUTGOING_INT_ARG_FIRST);
7496 enum mode_class mclass = GET_MODE_CLASS (mode);
7497 int regno;
7499 /* Vector types deserve special treatment because they are polymorphic wrt
7500 their mode, depending upon whether VIS instructions are enabled. */
7501 if (type && TREE_CODE (type) == VECTOR_TYPE)
7503 HOST_WIDE_INT size = int_size_in_bytes (type);
7504 gcc_assert ((TARGET_ARCH32 && size <= 8)
7505 || (TARGET_ARCH64 && size <= 32));
7507 if (mode == BLKmode)
7508 return function_arg_vector_value (size, SPARC_FP_ARG_FIRST);
7510 mclass = MODE_FLOAT;
7513 if (TARGET_ARCH64 && type)
7515 /* Structures up to 32 bytes in size are returned in registers. */
7516 if (TREE_CODE (type) == RECORD_TYPE)
7518 HOST_WIDE_INT size = int_size_in_bytes (type);
7519 gcc_assert (size <= 32);
7521 return function_arg_record_value (type, mode, 0, 1, regbase);
7524 /* Unions up to 32 bytes in size are returned in integer registers. */
7525 else if (TREE_CODE (type) == UNION_TYPE)
7527 HOST_WIDE_INT size = int_size_in_bytes (type);
7528 gcc_assert (size <= 32);
7530 return function_arg_union_value (size, mode, 0, regbase);
7533 /* Objects that require it are returned in FP registers. */
7534 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7537 /* All other aggregate types are returned in an integer register in a
7538 mode corresponding to the size of the type. */
7539 else if (AGGREGATE_TYPE_P (type))
7541 /* All other aggregate types are passed in an integer register
7542 in a mode corresponding to the size of the type. */
7543 HOST_WIDE_INT size = int_size_in_bytes (type);
7544 gcc_assert (size <= 32);
7546 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7548 /* ??? We probably should have made the same ABI change in
7549 3.4.0 as the one we made for unions. The latter was
7550 required by the SCD though, while the former is not
7551 specified, so we favored compatibility and efficiency.
7553 Now we're stuck for aggregates larger than 16 bytes,
7554 because OImode vanished in the meantime. Let's not
7555 try to be unduly clever, and simply follow the ABI
7556 for unions in that case. */
7557 if (mode == BLKmode)
7558 return function_arg_union_value (size, mode, 0, regbase);
7559 else
7560 mclass = MODE_INT;
7563 /* We should only have pointer and integer types at this point. This
7564 must match sparc_promote_function_mode. */
7565 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7566 mode = word_mode;
7569 /* We should only have pointer and integer types at this point, except with
7570 -freg-struct-return. This must match sparc_promote_function_mode. */
7571 else if (TARGET_ARCH32
7572 && !(type && AGGREGATE_TYPE_P (type))
7573 && mclass == MODE_INT
7574 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7575 mode = word_mode;
7577 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7578 regno = SPARC_FP_ARG_FIRST;
7579 else
7580 regno = regbase;
7582 return gen_rtx_REG (mode, regno);
7585 /* Handle TARGET_FUNCTION_VALUE.
7586 On the SPARC, the value is found in the first "output" register, but the
7587 called function leaves it in the first "input" register. */
7589 static rtx
7590 sparc_function_value (const_tree valtype,
7591 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7592 bool outgoing)
7594 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7597 /* Handle TARGET_LIBCALL_VALUE. */
7599 static rtx
7600 sparc_libcall_value (machine_mode mode,
7601 const_rtx fun ATTRIBUTE_UNUSED)
7603 return sparc_function_value_1 (NULL_TREE, mode, false);
7606 /* Handle FUNCTION_VALUE_REGNO_P.
7607 On the SPARC, the first "output" reg is used for integer values, and the
7608 first floating point register is used for floating point values. */
7610 static bool
7611 sparc_function_value_regno_p (const unsigned int regno)
7613 return (regno == 8 || (TARGET_FPU && regno == 32));
7616 /* Do what is necessary for `va_start'. We look at the current function
7617 to determine if stdarg or varargs is used and return the address of
7618 the first unnamed parameter. */
7620 static rtx
7621 sparc_builtin_saveregs (void)
7623 int first_reg = crtl->args.info.words;
7624 rtx address;
7625 int regno;
7627 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7628 emit_move_insn (gen_rtx_MEM (word_mode,
7629 gen_rtx_PLUS (Pmode,
7630 frame_pointer_rtx,
7631 GEN_INT (FIRST_PARM_OFFSET (0)
7632 + (UNITS_PER_WORD
7633 * regno)))),
7634 gen_rtx_REG (word_mode,
7635 SPARC_INCOMING_INT_ARG_FIRST + regno));
7637 address = gen_rtx_PLUS (Pmode,
7638 frame_pointer_rtx,
7639 GEN_INT (FIRST_PARM_OFFSET (0)
7640 + UNITS_PER_WORD * first_reg));
7642 return address;
7645 /* Implement `va_start' for stdarg. */
7647 static void
7648 sparc_va_start (tree valist, rtx nextarg)
7650 nextarg = expand_builtin_saveregs ();
7651 std_expand_builtin_va_start (valist, nextarg);
7654 /* Implement `va_arg' for stdarg. */
7656 static tree
7657 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7658 gimple_seq *post_p)
7660 HOST_WIDE_INT size, rsize, align;
7661 tree addr, incr;
7662 bool indirect;
7663 tree ptrtype = build_pointer_type (type);
7665 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
7667 indirect = true;
7668 size = rsize = UNITS_PER_WORD;
7669 align = 0;
7671 else
7673 indirect = false;
7674 size = int_size_in_bytes (type);
7675 rsize = ROUND_UP (size, UNITS_PER_WORD);
7676 align = 0;
7678 if (TARGET_ARCH64)
7680 /* For SPARC64, objects requiring 16-byte alignment get it. */
7681 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7682 align = 2 * UNITS_PER_WORD;
7684 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7685 are left-justified in their slots. */
7686 if (AGGREGATE_TYPE_P (type))
7688 if (size == 0)
7689 size = rsize = UNITS_PER_WORD;
7690 else
7691 size = rsize;
7696 incr = valist;
7697 if (align)
7699 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7700 incr = fold_convert (sizetype, incr);
7701 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7702 size_int (-align));
7703 incr = fold_convert (ptr_type_node, incr);
7706 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7707 addr = incr;
7709 if (BYTES_BIG_ENDIAN && size < rsize)
7710 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7712 if (indirect)
7714 addr = fold_convert (build_pointer_type (ptrtype), addr);
7715 addr = build_va_arg_indirect_ref (addr);
7718 /* If the address isn't aligned properly for the type, we need a temporary.
7719 FIXME: This is inefficient, usually we can do this in registers. */
7720 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
7722 tree tmp = create_tmp_var (type, "va_arg_tmp");
7723 tree dest_addr = build_fold_addr_expr (tmp);
7724 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
7725 3, dest_addr, addr, size_int (rsize));
7726 TREE_ADDRESSABLE (tmp) = 1;
7727 gimplify_and_add (copy, pre_p);
7728 addr = dest_addr;
7731 else
7732 addr = fold_convert (ptrtype, addr);
7734 incr = fold_build_pointer_plus_hwi (incr, rsize);
7735 gimplify_assign (valist, incr, post_p);
7737 return build_va_arg_indirect_ref (addr);
7740 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
7741 Specify whether the vector mode is supported by the hardware. */
7743 static bool
7744 sparc_vector_mode_supported_p (machine_mode mode)
7746 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
7749 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
7751 static machine_mode
7752 sparc_preferred_simd_mode (scalar_mode mode)
7754 if (TARGET_VIS)
7755 switch (mode)
7757 case E_SImode:
7758 return V2SImode;
7759 case E_HImode:
7760 return V4HImode;
7761 case E_QImode:
7762 return V8QImode;
7764 default:;
7767 return word_mode;
7770 /* Return the string to output an unconditional branch to LABEL, which is
7771 the operand number of the label.
7773 DEST is the destination insn (i.e. the label), INSN is the source. */
7775 const char *
7776 output_ubranch (rtx dest, rtx_insn *insn)
7778 static char string[64];
7779 bool v9_form = false;
7780 int delta;
7781 char *p;
7783 /* Even if we are trying to use cbcond for this, evaluate
7784 whether we can use V9 branches as our backup plan. */
7786 delta = 5000000;
7787 if (INSN_ADDRESSES_SET_P ())
7788 delta = (INSN_ADDRESSES (INSN_UID (dest))
7789 - INSN_ADDRESSES (INSN_UID (insn)));
7791 /* Leave some instructions for "slop". */
7792 if (TARGET_V9 && delta >= -260000 && delta < 260000)
7793 v9_form = true;
7795 if (TARGET_CBCOND)
7797 bool emit_nop = emit_cbcond_nop (insn);
7798 bool far = false;
7799 const char *rval;
7801 if (delta < -500 || delta > 500)
7802 far = true;
7804 if (far)
7806 if (v9_form)
7807 rval = "ba,a,pt\t%%xcc, %l0";
7808 else
7809 rval = "b,a\t%l0";
7811 else
7813 if (emit_nop)
7814 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
7815 else
7816 rval = "cwbe\t%%g0, %%g0, %l0";
7818 return rval;
7821 if (v9_form)
7822 strcpy (string, "ba%*,pt\t%%xcc, ");
7823 else
7824 strcpy (string, "b%*\t");
7826 p = strchr (string, '\0');
7827 *p++ = '%';
7828 *p++ = 'l';
7829 *p++ = '0';
7830 *p++ = '%';
7831 *p++ = '(';
7832 *p = '\0';
7834 return string;
7837 /* Return the string to output a conditional branch to LABEL, which is
7838 the operand number of the label. OP is the conditional expression.
7839 XEXP (OP, 0) is assumed to be a condition code register (integer or
7840 floating point) and its mode specifies what kind of comparison we made.
7842 DEST is the destination insn (i.e. the label), INSN is the source.
7844 REVERSED is nonzero if we should reverse the sense of the comparison.
7846 ANNUL is nonzero if we should generate an annulling branch. */
7848 const char *
7849 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
7850 rtx_insn *insn)
7852 static char string[64];
7853 enum rtx_code code = GET_CODE (op);
7854 rtx cc_reg = XEXP (op, 0);
7855 machine_mode mode = GET_MODE (cc_reg);
7856 const char *labelno, *branch;
7857 int spaces = 8, far;
7858 char *p;
7860 /* v9 branches are limited to +-1MB. If it is too far away,
7861 change
7863 bne,pt %xcc, .LC30
7867 be,pn %xcc, .+12
7869 ba .LC30
7873 fbne,a,pn %fcc2, .LC29
7877 fbe,pt %fcc2, .+16
7879 ba .LC29 */
7881 far = TARGET_V9 && (get_attr_length (insn) >= 3);
7882 if (reversed ^ far)
7884 /* Reversal of FP compares takes care -- an ordered compare
7885 becomes an unordered compare and vice versa. */
7886 if (mode == CCFPmode || mode == CCFPEmode)
7887 code = reverse_condition_maybe_unordered (code);
7888 else
7889 code = reverse_condition (code);
7892 /* Start by writing the branch condition. */
7893 if (mode == CCFPmode || mode == CCFPEmode)
7895 switch (code)
7897 case NE:
7898 branch = "fbne";
7899 break;
7900 case EQ:
7901 branch = "fbe";
7902 break;
7903 case GE:
7904 branch = "fbge";
7905 break;
7906 case GT:
7907 branch = "fbg";
7908 break;
7909 case LE:
7910 branch = "fble";
7911 break;
7912 case LT:
7913 branch = "fbl";
7914 break;
7915 case UNORDERED:
7916 branch = "fbu";
7917 break;
7918 case ORDERED:
7919 branch = "fbo";
7920 break;
7921 case UNGT:
7922 branch = "fbug";
7923 break;
7924 case UNLT:
7925 branch = "fbul";
7926 break;
7927 case UNEQ:
7928 branch = "fbue";
7929 break;
7930 case UNGE:
7931 branch = "fbuge";
7932 break;
7933 case UNLE:
7934 branch = "fbule";
7935 break;
7936 case LTGT:
7937 branch = "fblg";
7938 break;
7939 default:
7940 gcc_unreachable ();
7943 /* ??? !v9: FP branches cannot be preceded by another floating point
7944 insn. Because there is currently no concept of pre-delay slots,
7945 we can fix this only by always emitting a nop before a floating
7946 point branch. */
7948 string[0] = '\0';
7949 if (! TARGET_V9)
7950 strcpy (string, "nop\n\t");
7951 strcat (string, branch);
7953 else
7955 switch (code)
7957 case NE:
7958 if (mode == CCVmode || mode == CCXVmode)
7959 branch = "bvs";
7960 else
7961 branch = "bne";
7962 break;
7963 case EQ:
7964 if (mode == CCVmode || mode == CCXVmode)
7965 branch = "bvc";
7966 else
7967 branch = "be";
7968 break;
7969 case GE:
7970 if (mode == CCNZmode || mode == CCXNZmode)
7971 branch = "bpos";
7972 else
7973 branch = "bge";
7974 break;
7975 case GT:
7976 branch = "bg";
7977 break;
7978 case LE:
7979 branch = "ble";
7980 break;
7981 case LT:
7982 if (mode == CCNZmode || mode == CCXNZmode)
7983 branch = "bneg";
7984 else
7985 branch = "bl";
7986 break;
7987 case GEU:
7988 branch = "bgeu";
7989 break;
7990 case GTU:
7991 branch = "bgu";
7992 break;
7993 case LEU:
7994 branch = "bleu";
7995 break;
7996 case LTU:
7997 branch = "blu";
7998 break;
7999 default:
8000 gcc_unreachable ();
8002 strcpy (string, branch);
8004 spaces -= strlen (branch);
8005 p = strchr (string, '\0');
8007 /* Now add the annulling, the label, and a possible noop. */
8008 if (annul && ! far)
8010 strcpy (p, ",a");
8011 p += 2;
8012 spaces -= 2;
8015 if (TARGET_V9)
8017 rtx note;
8018 int v8 = 0;
8020 if (! far && insn && INSN_ADDRESSES_SET_P ())
8022 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8023 - INSN_ADDRESSES (INSN_UID (insn)));
8024 /* Leave some instructions for "slop". */
8025 if (delta < -260000 || delta >= 260000)
8026 v8 = 1;
8029 switch (mode)
8031 case E_CCmode:
8032 case E_CCNZmode:
8033 case E_CCCmode:
8034 case E_CCVmode:
8035 labelno = "%%icc, ";
8036 if (v8)
8037 labelno = "";
8038 break;
8039 case E_CCXmode:
8040 case E_CCXNZmode:
8041 case E_CCXCmode:
8042 case E_CCXVmode:
8043 labelno = "%%xcc, ";
8044 gcc_assert (!v8);
8045 break;
8046 case E_CCFPmode:
8047 case E_CCFPEmode:
8049 static char v9_fcc_labelno[] = "%%fccX, ";
8050 /* Set the char indicating the number of the fcc reg to use. */
8051 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8052 labelno = v9_fcc_labelno;
8053 if (v8)
8055 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8056 labelno = "";
8059 break;
8060 default:
8061 gcc_unreachable ();
8064 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8066 strcpy (p,
8067 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8068 >= profile_probability::even ()) ^ far)
8069 ? ",pt" : ",pn");
8070 p += 3;
8071 spaces -= 3;
8074 else
8075 labelno = "";
8077 if (spaces > 0)
8078 *p++ = '\t';
8079 else
8080 *p++ = ' ';
8081 strcpy (p, labelno);
8082 p = strchr (p, '\0');
8083 if (far)
8085 strcpy (p, ".+12\n\t nop\n\tb\t");
8086 /* Skip the next insn if requested or
8087 if we know that it will be a nop. */
8088 if (annul || ! final_sequence)
8089 p[3] = '6';
8090 p += 14;
8092 *p++ = '%';
8093 *p++ = 'l';
8094 *p++ = label + '0';
8095 *p++ = '%';
8096 *p++ = '#';
8097 *p = '\0';
8099 return string;
8102 /* Emit a library call comparison between floating point X and Y.
8103 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8104 Return the new operator to be used in the comparison sequence.
8106 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8107 values as arguments instead of the TFmode registers themselves,
8108 that's why we cannot call emit_float_lib_cmp. */
8111 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8113 const char *qpfunc;
8114 rtx slot0, slot1, result, tem, tem2, libfunc;
8115 machine_mode mode;
8116 enum rtx_code new_comparison;
8118 switch (comparison)
8120 case EQ:
8121 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8122 break;
8124 case NE:
8125 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8126 break;
8128 case GT:
8129 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8130 break;
8132 case GE:
8133 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8134 break;
8136 case LT:
8137 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8138 break;
8140 case LE:
8141 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8142 break;
8144 case ORDERED:
8145 case UNORDERED:
8146 case UNGT:
8147 case UNLT:
8148 case UNEQ:
8149 case UNGE:
8150 case UNLE:
8151 case LTGT:
8152 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8153 break;
8155 default:
8156 gcc_unreachable ();
8159 if (TARGET_ARCH64)
8161 if (MEM_P (x))
8163 tree expr = MEM_EXPR (x);
8164 if (expr)
8165 mark_addressable (expr);
8166 slot0 = x;
8168 else
8170 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8171 emit_move_insn (slot0, x);
8174 if (MEM_P (y))
8176 tree expr = MEM_EXPR (y);
8177 if (expr)
8178 mark_addressable (expr);
8179 slot1 = y;
8181 else
8183 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8184 emit_move_insn (slot1, y);
8187 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8188 emit_library_call (libfunc, LCT_NORMAL,
8189 DImode,
8190 XEXP (slot0, 0), Pmode,
8191 XEXP (slot1, 0), Pmode);
8192 mode = DImode;
8194 else
8196 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8197 emit_library_call (libfunc, LCT_NORMAL,
8198 SImode,
8199 x, TFmode, y, TFmode);
8200 mode = SImode;
8204 /* Immediately move the result of the libcall into a pseudo
8205 register so reload doesn't clobber the value if it needs
8206 the return register for a spill reg. */
8207 result = gen_reg_rtx (mode);
8208 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8210 switch (comparison)
8212 default:
8213 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8214 case ORDERED:
8215 case UNORDERED:
8216 new_comparison = (comparison == UNORDERED ? EQ : NE);
8217 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8218 case UNGT:
8219 case UNGE:
8220 new_comparison = (comparison == UNGT ? GT : NE);
8221 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8222 case UNLE:
8223 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8224 case UNLT:
8225 tem = gen_reg_rtx (mode);
8226 if (TARGET_ARCH32)
8227 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8228 else
8229 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8230 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8231 case UNEQ:
8232 case LTGT:
8233 tem = gen_reg_rtx (mode);
8234 if (TARGET_ARCH32)
8235 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8236 else
8237 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8238 tem2 = gen_reg_rtx (mode);
8239 if (TARGET_ARCH32)
8240 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8241 else
8242 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8243 new_comparison = (comparison == UNEQ ? EQ : NE);
8244 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8247 gcc_unreachable ();
8250 /* Generate an unsigned DImode to FP conversion. This is the same code
8251 optabs would emit if we didn't have TFmode patterns. */
8253 void
8254 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8256 rtx i0, i1, f0, in, out;
8258 out = operands[0];
8259 in = force_reg (DImode, operands[1]);
8260 rtx_code_label *neglab = gen_label_rtx ();
8261 rtx_code_label *donelab = gen_label_rtx ();
8262 i0 = gen_reg_rtx (DImode);
8263 i1 = gen_reg_rtx (DImode);
8264 f0 = gen_reg_rtx (mode);
8266 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8268 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8269 emit_jump_insn (gen_jump (donelab));
8270 emit_barrier ();
8272 emit_label (neglab);
8274 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8275 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8276 emit_insn (gen_iordi3 (i0, i0, i1));
8277 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8278 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8280 emit_label (donelab);
8283 /* Generate an FP to unsigned DImode conversion. This is the same code
8284 optabs would emit if we didn't have TFmode patterns. */
8286 void
8287 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8289 rtx i0, i1, f0, in, out, limit;
8291 out = operands[0];
8292 in = force_reg (mode, operands[1]);
8293 rtx_code_label *neglab = gen_label_rtx ();
8294 rtx_code_label *donelab = gen_label_rtx ();
8295 i0 = gen_reg_rtx (DImode);
8296 i1 = gen_reg_rtx (DImode);
8297 limit = gen_reg_rtx (mode);
8298 f0 = gen_reg_rtx (mode);
8300 emit_move_insn (limit,
8301 const_double_from_real_value (
8302 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8303 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8305 emit_insn (gen_rtx_SET (out,
8306 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8307 emit_jump_insn (gen_jump (donelab));
8308 emit_barrier ();
8310 emit_label (neglab);
8312 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8313 emit_insn (gen_rtx_SET (i0,
8314 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8315 emit_insn (gen_movdi (i1, const1_rtx));
8316 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8317 emit_insn (gen_xordi3 (out, i0, i1));
8319 emit_label (donelab);
8322 /* Return the string to output a compare and branch instruction to DEST.
8323 DEST is the destination insn (i.e. the label), INSN is the source,
8324 and OP is the conditional expression. */
8326 const char *
8327 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8329 machine_mode mode = GET_MODE (XEXP (op, 0));
8330 enum rtx_code code = GET_CODE (op);
8331 const char *cond_str, *tmpl;
8332 int far, emit_nop, len;
8333 static char string[64];
8334 char size_char;
8336 /* Compare and Branch is limited to +-2KB. If it is too far away,
8337 change
8339 cxbne X, Y, .LC30
8343 cxbe X, Y, .+16
8345 ba,pt xcc, .LC30
8346 nop */
8348 len = get_attr_length (insn);
8350 far = len == 4;
8351 emit_nop = len == 2;
8353 if (far)
8354 code = reverse_condition (code);
8356 size_char = ((mode == SImode) ? 'w' : 'x');
8358 switch (code)
8360 case NE:
8361 cond_str = "ne";
8362 break;
8364 case EQ:
8365 cond_str = "e";
8366 break;
8368 case GE:
8369 cond_str = "ge";
8370 break;
8372 case GT:
8373 cond_str = "g";
8374 break;
8376 case LE:
8377 cond_str = "le";
8378 break;
8380 case LT:
8381 cond_str = "l";
8382 break;
8384 case GEU:
8385 cond_str = "cc";
8386 break;
8388 case GTU:
8389 cond_str = "gu";
8390 break;
8392 case LEU:
8393 cond_str = "leu";
8394 break;
8396 case LTU:
8397 cond_str = "cs";
8398 break;
8400 default:
8401 gcc_unreachable ();
8404 if (far)
8406 int veryfar = 1, delta;
8408 if (INSN_ADDRESSES_SET_P ())
8410 delta = (INSN_ADDRESSES (INSN_UID (dest))
8411 - INSN_ADDRESSES (INSN_UID (insn)));
8412 /* Leave some instructions for "slop". */
8413 if (delta >= -260000 && delta < 260000)
8414 veryfar = 0;
8417 if (veryfar)
8418 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8419 else
8420 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8422 else
8424 if (emit_nop)
8425 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8426 else
8427 tmpl = "c%cb%s\t%%1, %%2, %%3";
8430 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8432 return string;
8435 /* Return the string to output a conditional branch to LABEL, testing
8436 register REG. LABEL is the operand number of the label; REG is the
8437 operand number of the reg. OP is the conditional expression. The mode
8438 of REG says what kind of comparison we made.
8440 DEST is the destination insn (i.e. the label), INSN is the source.
8442 REVERSED is nonzero if we should reverse the sense of the comparison.
8444 ANNUL is nonzero if we should generate an annulling branch. */
8446 const char *
8447 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8448 int annul, rtx_insn *insn)
8450 static char string[64];
8451 enum rtx_code code = GET_CODE (op);
8452 machine_mode mode = GET_MODE (XEXP (op, 0));
8453 rtx note;
8454 int far;
8455 char *p;
8457 /* branch on register are limited to +-128KB. If it is too far away,
8458 change
8460 brnz,pt %g1, .LC30
8464 brz,pn %g1, .+12
8466 ba,pt %xcc, .LC30
8470 brgez,a,pn %o1, .LC29
8474 brlz,pt %o1, .+16
8476 ba,pt %xcc, .LC29 */
8478 far = get_attr_length (insn) >= 3;
8480 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8481 if (reversed ^ far)
8482 code = reverse_condition (code);
8484 /* Only 64-bit versions of these instructions exist. */
8485 gcc_assert (mode == DImode);
8487 /* Start by writing the branch condition. */
8489 switch (code)
8491 case NE:
8492 strcpy (string, "brnz");
8493 break;
8495 case EQ:
8496 strcpy (string, "brz");
8497 break;
8499 case GE:
8500 strcpy (string, "brgez");
8501 break;
8503 case LT:
8504 strcpy (string, "brlz");
8505 break;
8507 case LE:
8508 strcpy (string, "brlez");
8509 break;
8511 case GT:
8512 strcpy (string, "brgz");
8513 break;
8515 default:
8516 gcc_unreachable ();
8519 p = strchr (string, '\0');
8521 /* Now add the annulling, reg, label, and nop. */
8522 if (annul && ! far)
8524 strcpy (p, ",a");
8525 p += 2;
8528 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8530 strcpy (p,
8531 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8532 >= profile_probability::even ()) ^ far)
8533 ? ",pt" : ",pn");
8534 p += 3;
8537 *p = p < string + 8 ? '\t' : ' ';
8538 p++;
8539 *p++ = '%';
8540 *p++ = '0' + reg;
8541 *p++ = ',';
8542 *p++ = ' ';
8543 if (far)
8545 int veryfar = 1, delta;
8547 if (INSN_ADDRESSES_SET_P ())
8549 delta = (INSN_ADDRESSES (INSN_UID (dest))
8550 - INSN_ADDRESSES (INSN_UID (insn)));
8551 /* Leave some instructions for "slop". */
8552 if (delta >= -260000 && delta < 260000)
8553 veryfar = 0;
8556 strcpy (p, ".+12\n\t nop\n\t");
8557 /* Skip the next insn if requested or
8558 if we know that it will be a nop. */
8559 if (annul || ! final_sequence)
8560 p[3] = '6';
8561 p += 12;
8562 if (veryfar)
8564 strcpy (p, "b\t");
8565 p += 2;
8567 else
8569 strcpy (p, "ba,pt\t%%xcc, ");
8570 p += 13;
8573 *p++ = '%';
8574 *p++ = 'l';
8575 *p++ = '0' + label;
8576 *p++ = '%';
8577 *p++ = '#';
8578 *p = '\0';
8580 return string;
8583 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8584 Such instructions cannot be used in the delay slot of return insn on v9.
8585 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8588 static int
8589 epilogue_renumber (register rtx *where, int test)
8591 register const char *fmt;
8592 register int i;
8593 register enum rtx_code code;
8595 if (*where == 0)
8596 return 0;
8598 code = GET_CODE (*where);
8600 switch (code)
8602 case REG:
8603 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8604 return 1;
8605 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8606 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8607 /* fallthrough */
8608 case SCRATCH:
8609 case CC0:
8610 case PC:
8611 case CONST_INT:
8612 case CONST_WIDE_INT:
8613 case CONST_DOUBLE:
8614 return 0;
8616 /* Do not replace the frame pointer with the stack pointer because
8617 it can cause the delayed instruction to load below the stack.
8618 This occurs when instructions like:
8620 (set (reg/i:SI 24 %i0)
8621 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8622 (const_int -20 [0xffffffec])) 0))
8624 are in the return delayed slot. */
8625 case PLUS:
8626 if (GET_CODE (XEXP (*where, 0)) == REG
8627 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8628 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8629 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8630 return 1;
8631 break;
8633 case MEM:
8634 if (SPARC_STACK_BIAS
8635 && GET_CODE (XEXP (*where, 0)) == REG
8636 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8637 return 1;
8638 break;
8640 default:
8641 break;
8644 fmt = GET_RTX_FORMAT (code);
8646 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8648 if (fmt[i] == 'E')
8650 register int j;
8651 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8652 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8653 return 1;
8655 else if (fmt[i] == 'e'
8656 && epilogue_renumber (&(XEXP (*where, i)), test))
8657 return 1;
8659 return 0;
8662 /* Leaf functions and non-leaf functions have different needs. */
8664 static const int
8665 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8667 static const int
8668 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8670 static const int *const reg_alloc_orders[] = {
8671 reg_leaf_alloc_order,
8672 reg_nonleaf_alloc_order};
8674 void
8675 order_regs_for_local_alloc (void)
8677 static int last_order_nonleaf = 1;
8679 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8681 last_order_nonleaf = !last_order_nonleaf;
8682 memcpy ((char *) reg_alloc_order,
8683 (const char *) reg_alloc_orders[last_order_nonleaf],
8684 FIRST_PSEUDO_REGISTER * sizeof (int));
8688 /* Return 1 if REG and MEM are legitimate enough to allow the various
8689 MEM<-->REG splits to be run. */
8692 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8694 /* Punt if we are here by mistake. */
8695 gcc_assert (reload_completed);
8697 /* We must have an offsettable memory reference. */
8698 if (!offsettable_memref_p (mem))
8699 return 0;
8701 /* If we have legitimate args for ldd/std, we do not want
8702 the split to happen. */
8703 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
8704 return 0;
8706 /* Success. */
8707 return 1;
8710 /* Split a REG <-- MEM move into a pair of moves in MODE. */
8712 void
8713 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
8715 rtx high_part = gen_highpart (mode, dest);
8716 rtx low_part = gen_lowpart (mode, dest);
8717 rtx word0 = adjust_address (src, mode, 0);
8718 rtx word1 = adjust_address (src, mode, 4);
8720 if (reg_overlap_mentioned_p (high_part, word1))
8722 emit_move_insn_1 (low_part, word1);
8723 emit_move_insn_1 (high_part, word0);
8725 else
8727 emit_move_insn_1 (high_part, word0);
8728 emit_move_insn_1 (low_part, word1);
8732 /* Split a MEM <-- REG move into a pair of moves in MODE. */
8734 void
8735 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
8737 rtx word0 = adjust_address (dest, mode, 0);
8738 rtx word1 = adjust_address (dest, mode, 4);
8739 rtx high_part = gen_highpart (mode, src);
8740 rtx low_part = gen_lowpart (mode, src);
8742 emit_move_insn_1 (word0, high_part);
8743 emit_move_insn_1 (word1, low_part);
8746 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
8749 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
8751 /* Punt if we are here by mistake. */
8752 gcc_assert (reload_completed);
8754 if (GET_CODE (reg1) == SUBREG)
8755 reg1 = SUBREG_REG (reg1);
8756 if (GET_CODE (reg1) != REG)
8757 return 0;
8758 const int regno1 = REGNO (reg1);
8760 if (GET_CODE (reg2) == SUBREG)
8761 reg2 = SUBREG_REG (reg2);
8762 if (GET_CODE (reg2) != REG)
8763 return 0;
8764 const int regno2 = REGNO (reg2);
8766 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
8767 return 1;
8769 if (TARGET_VIS3)
8771 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
8772 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
8773 return 1;
8776 return 0;
8779 /* Split a REG <--> REG move into a pair of moves in MODE. */
8781 void
8782 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
8784 rtx dest1 = gen_highpart (mode, dest);
8785 rtx dest2 = gen_lowpart (mode, dest);
8786 rtx src1 = gen_highpart (mode, src);
8787 rtx src2 = gen_lowpart (mode, src);
8789 /* Now emit using the real source and destination we found, swapping
8790 the order if we detect overlap. */
8791 if (reg_overlap_mentioned_p (dest1, src2))
8793 emit_move_insn_1 (dest2, src2);
8794 emit_move_insn_1 (dest1, src1);
8796 else
8798 emit_move_insn_1 (dest1, src1);
8799 emit_move_insn_1 (dest2, src2);
8803 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
8804 This makes them candidates for using ldd and std insns.
8806 Note reg1 and reg2 *must* be hard registers. */
8809 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
8811 /* We might have been passed a SUBREG. */
8812 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
8813 return 0;
8815 if (REGNO (reg1) % 2 != 0)
8816 return 0;
8818 /* Integer ldd is deprecated in SPARC V9 */
8819 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
8820 return 0;
8822 return (REGNO (reg1) == REGNO (reg2) - 1);
8825 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
8826 an ldd or std insn.
8828 This can only happen when addr1 and addr2, the addresses in mem1
8829 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
8830 addr1 must also be aligned on a 64-bit boundary.
8832 Also iff dependent_reg_rtx is not null it should not be used to
8833 compute the address for mem1, i.e. we cannot optimize a sequence
8834 like:
8835 ld [%o0], %o0
8836 ld [%o0 + 4], %o1
8838 ldd [%o0], %o0
8839 nor:
8840 ld [%g3 + 4], %g3
8841 ld [%g3], %g2
8843 ldd [%g3], %g2
8845 But, note that the transformation from:
8846 ld [%g2 + 4], %g3
8847 ld [%g2], %g2
8849 ldd [%g2], %g2
8850 is perfectly fine. Thus, the peephole2 patterns always pass us
8851 the destination register of the first load, never the second one.
8853 For stores we don't have a similar problem, so dependent_reg_rtx is
8854 NULL_RTX. */
8857 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
8859 rtx addr1, addr2;
8860 unsigned int reg1;
8861 HOST_WIDE_INT offset1;
8863 /* The mems cannot be volatile. */
8864 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
8865 return 0;
8867 /* MEM1 should be aligned on a 64-bit boundary. */
8868 if (MEM_ALIGN (mem1) < 64)
8869 return 0;
8871 addr1 = XEXP (mem1, 0);
8872 addr2 = XEXP (mem2, 0);
8874 /* Extract a register number and offset (if used) from the first addr. */
8875 if (GET_CODE (addr1) == PLUS)
8877 /* If not a REG, return zero. */
8878 if (GET_CODE (XEXP (addr1, 0)) != REG)
8879 return 0;
8880 else
8882 reg1 = REGNO (XEXP (addr1, 0));
8883 /* The offset must be constant! */
8884 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
8885 return 0;
8886 offset1 = INTVAL (XEXP (addr1, 1));
8889 else if (GET_CODE (addr1) != REG)
8890 return 0;
8891 else
8893 reg1 = REGNO (addr1);
8894 /* This was a simple (mem (reg)) expression. Offset is 0. */
8895 offset1 = 0;
8898 /* Make sure the second address is a (mem (plus (reg) (const_int). */
8899 if (GET_CODE (addr2) != PLUS)
8900 return 0;
8902 if (GET_CODE (XEXP (addr2, 0)) != REG
8903 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
8904 return 0;
8906 if (reg1 != REGNO (XEXP (addr2, 0)))
8907 return 0;
8909 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
8910 return 0;
8912 /* The first offset must be evenly divisible by 8 to ensure the
8913 address is 64-bit aligned. */
8914 if (offset1 % 8 != 0)
8915 return 0;
8917 /* The offset for the second addr must be 4 more than the first addr. */
8918 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
8919 return 0;
8921 /* All the tests passed. addr1 and addr2 are valid for ldd and std
8922 instructions. */
8923 return 1;
8926 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
8929 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
8931 rtx x = widen_memory_access (mem1, mode, 0);
8932 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
8933 return x;
8936 /* Return 1 if reg is a pseudo, or is the first register in
8937 a hard register pair. This makes it suitable for use in
8938 ldd and std insns. */
8941 register_ok_for_ldd (rtx reg)
8943 /* We might have been passed a SUBREG. */
8944 if (!REG_P (reg))
8945 return 0;
8947 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
8948 return (REGNO (reg) % 2 == 0);
8950 return 1;
8953 /* Return 1 if OP, a MEM, has an address which is known to be
8954 aligned to an 8-byte boundary. */
8957 memory_ok_for_ldd (rtx op)
8959 /* In 64-bit mode, we assume that the address is word-aligned. */
8960 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
8961 return 0;
8963 if (! can_create_pseudo_p ()
8964 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
8965 return 0;
8967 return 1;
8970 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
8972 static bool
8973 sparc_print_operand_punct_valid_p (unsigned char code)
8975 if (code == '#'
8976 || code == '*'
8977 || code == '('
8978 || code == ')'
8979 || code == '_'
8980 || code == '&')
8981 return true;
8983 return false;
8986 /* Implement TARGET_PRINT_OPERAND.
8987 Print operand X (an rtx) in assembler syntax to file FILE.
8988 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
8989 For `%' followed by punctuation, CODE is the punctuation and X is null. */
8991 static void
8992 sparc_print_operand (FILE *file, rtx x, int code)
8994 const char *s;
8996 switch (code)
8998 case '#':
8999 /* Output an insn in a delay slot. */
9000 if (final_sequence)
9001 sparc_indent_opcode = 1;
9002 else
9003 fputs ("\n\t nop", file);
9004 return;
9005 case '*':
9006 /* Output an annul flag if there's nothing for the delay slot and we
9007 are optimizing. This is always used with '(' below.
9008 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9009 this is a dbx bug. So, we only do this when optimizing.
9010 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9011 Always emit a nop in case the next instruction is a branch. */
9012 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9013 fputs (",a", file);
9014 return;
9015 case '(':
9016 /* Output a 'nop' if there's nothing for the delay slot and we are
9017 not optimizing. This is always used with '*' above. */
9018 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9019 fputs ("\n\t nop", file);
9020 else if (final_sequence)
9021 sparc_indent_opcode = 1;
9022 return;
9023 case ')':
9024 /* Output the right displacement from the saved PC on function return.
9025 The caller may have placed an "unimp" insn immediately after the call
9026 so we have to account for it. This insn is used in the 32-bit ABI
9027 when calling a function that returns a non zero-sized structure. The
9028 64-bit ABI doesn't have it. Be careful to have this test be the same
9029 as that for the call. The exception is when sparc_std_struct_return
9030 is enabled, the psABI is followed exactly and the adjustment is made
9031 by the code in sparc_struct_value_rtx. The call emitted is the same
9032 when sparc_std_struct_return is enabled. */
9033 if (!TARGET_ARCH64
9034 && cfun->returns_struct
9035 && !sparc_std_struct_return
9036 && DECL_SIZE (DECL_RESULT (current_function_decl))
9037 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9038 == INTEGER_CST
9039 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9040 fputs ("12", file);
9041 else
9042 fputc ('8', file);
9043 return;
9044 case '_':
9045 /* Output the Embedded Medium/Anywhere code model base register. */
9046 fputs (EMBMEDANY_BASE_REG, file);
9047 return;
9048 case '&':
9049 /* Print some local dynamic TLS name. */
9050 if (const char *name = get_some_local_dynamic_name ())
9051 assemble_name (file, name);
9052 else
9053 output_operand_lossage ("'%%&' used without any "
9054 "local dynamic TLS references");
9055 return;
9057 case 'Y':
9058 /* Adjust the operand to take into account a RESTORE operation. */
9059 if (GET_CODE (x) == CONST_INT)
9060 break;
9061 else if (GET_CODE (x) != REG)
9062 output_operand_lossage ("invalid %%Y operand");
9063 else if (REGNO (x) < 8)
9064 fputs (reg_names[REGNO (x)], file);
9065 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9066 fputs (reg_names[REGNO (x)-16], file);
9067 else
9068 output_operand_lossage ("invalid %%Y operand");
9069 return;
9070 case 'L':
9071 /* Print out the low order register name of a register pair. */
9072 if (WORDS_BIG_ENDIAN)
9073 fputs (reg_names[REGNO (x)+1], file);
9074 else
9075 fputs (reg_names[REGNO (x)], file);
9076 return;
9077 case 'H':
9078 /* Print out the high order register name of a register pair. */
9079 if (WORDS_BIG_ENDIAN)
9080 fputs (reg_names[REGNO (x)], file);
9081 else
9082 fputs (reg_names[REGNO (x)+1], file);
9083 return;
9084 case 'R':
9085 /* Print out the second register name of a register pair or quad.
9086 I.e., R (%o0) => %o1. */
9087 fputs (reg_names[REGNO (x)+1], file);
9088 return;
9089 case 'S':
9090 /* Print out the third register name of a register quad.
9091 I.e., S (%o0) => %o2. */
9092 fputs (reg_names[REGNO (x)+2], file);
9093 return;
9094 case 'T':
9095 /* Print out the fourth register name of a register quad.
9096 I.e., T (%o0) => %o3. */
9097 fputs (reg_names[REGNO (x)+3], file);
9098 return;
9099 case 'x':
9100 /* Print a condition code register. */
9101 if (REGNO (x) == SPARC_ICC_REG)
9103 switch (GET_MODE (x))
9105 case E_CCmode:
9106 case E_CCNZmode:
9107 case E_CCCmode:
9108 case E_CCVmode:
9109 s = "%icc";
9110 break;
9111 case E_CCXmode:
9112 case E_CCXNZmode:
9113 case E_CCXCmode:
9114 case E_CCXVmode:
9115 s = "%xcc";
9116 break;
9117 default:
9118 gcc_unreachable ();
9120 fputs (s, file);
9122 else
9123 /* %fccN register */
9124 fputs (reg_names[REGNO (x)], file);
9125 return;
9126 case 'm':
9127 /* Print the operand's address only. */
9128 output_address (GET_MODE (x), XEXP (x, 0));
9129 return;
9130 case 'r':
9131 /* In this case we need a register. Use %g0 if the
9132 operand is const0_rtx. */
9133 if (x == const0_rtx
9134 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9136 fputs ("%g0", file);
9137 return;
9139 else
9140 break;
9142 case 'A':
9143 switch (GET_CODE (x))
9145 case IOR:
9146 s = "or";
9147 break;
9148 case AND:
9149 s = "and";
9150 break;
9151 case XOR:
9152 s = "xor";
9153 break;
9154 default:
9155 output_operand_lossage ("invalid %%A operand");
9156 s = "";
9157 break;
9159 fputs (s, file);
9160 return;
9162 case 'B':
9163 switch (GET_CODE (x))
9165 case IOR:
9166 s = "orn";
9167 break;
9168 case AND:
9169 s = "andn";
9170 break;
9171 case XOR:
9172 s = "xnor";
9173 break;
9174 default:
9175 output_operand_lossage ("invalid %%B operand");
9176 s = "";
9177 break;
9179 fputs (s, file);
9180 return;
9182 /* This is used by the conditional move instructions. */
9183 case 'C':
9185 machine_mode mode = GET_MODE (XEXP (x, 0));
9186 switch (GET_CODE (x))
9188 case NE:
9189 if (mode == CCVmode || mode == CCXVmode)
9190 s = "vs";
9191 else
9192 s = "ne";
9193 break;
9194 case EQ:
9195 if (mode == CCVmode || mode == CCXVmode)
9196 s = "vc";
9197 else
9198 s = "e";
9199 break;
9200 case GE:
9201 if (mode == CCNZmode || mode == CCXNZmode)
9202 s = "pos";
9203 else
9204 s = "ge";
9205 break;
9206 case GT:
9207 s = "g";
9208 break;
9209 case LE:
9210 s = "le";
9211 break;
9212 case LT:
9213 if (mode == CCNZmode || mode == CCXNZmode)
9214 s = "neg";
9215 else
9216 s = "l";
9217 break;
9218 case GEU:
9219 s = "geu";
9220 break;
9221 case GTU:
9222 s = "gu";
9223 break;
9224 case LEU:
9225 s = "leu";
9226 break;
9227 case LTU:
9228 s = "lu";
9229 break;
9230 case LTGT:
9231 s = "lg";
9232 break;
9233 case UNORDERED:
9234 s = "u";
9235 break;
9236 case ORDERED:
9237 s = "o";
9238 break;
9239 case UNLT:
9240 s = "ul";
9241 break;
9242 case UNLE:
9243 s = "ule";
9244 break;
9245 case UNGT:
9246 s = "ug";
9247 break;
9248 case UNGE:
9249 s = "uge"
9250 ; break;
9251 case UNEQ:
9252 s = "ue";
9253 break;
9254 default:
9255 output_operand_lossage ("invalid %%C operand");
9256 s = "";
9257 break;
9259 fputs (s, file);
9260 return;
9263 /* This are used by the movr instruction pattern. */
9264 case 'D':
9266 switch (GET_CODE (x))
9268 case NE:
9269 s = "ne";
9270 break;
9271 case EQ:
9272 s = "e";
9273 break;
9274 case GE:
9275 s = "gez";
9276 break;
9277 case LT:
9278 s = "lz";
9279 break;
9280 case LE:
9281 s = "lez";
9282 break;
9283 case GT:
9284 s = "gz";
9285 break;
9286 default:
9287 output_operand_lossage ("invalid %%D operand");
9288 s = "";
9289 break;
9291 fputs (s, file);
9292 return;
9295 case 'b':
9297 /* Print a sign-extended character. */
9298 int i = trunc_int_for_mode (INTVAL (x), QImode);
9299 fprintf (file, "%d", i);
9300 return;
9303 case 'f':
9304 /* Operand must be a MEM; write its address. */
9305 if (GET_CODE (x) != MEM)
9306 output_operand_lossage ("invalid %%f operand");
9307 output_address (GET_MODE (x), XEXP (x, 0));
9308 return;
9310 case 's':
9312 /* Print a sign-extended 32-bit value. */
9313 HOST_WIDE_INT i;
9314 if (GET_CODE(x) == CONST_INT)
9315 i = INTVAL (x);
9316 else
9318 output_operand_lossage ("invalid %%s operand");
9319 return;
9321 i = trunc_int_for_mode (i, SImode);
9322 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9323 return;
9326 case 0:
9327 /* Do nothing special. */
9328 break;
9330 default:
9331 /* Undocumented flag. */
9332 output_operand_lossage ("invalid operand output code");
9335 if (GET_CODE (x) == REG)
9336 fputs (reg_names[REGNO (x)], file);
9337 else if (GET_CODE (x) == MEM)
9339 fputc ('[', file);
9340 /* Poor Sun assembler doesn't understand absolute addressing. */
9341 if (CONSTANT_P (XEXP (x, 0)))
9342 fputs ("%g0+", file);
9343 output_address (GET_MODE (x), XEXP (x, 0));
9344 fputc (']', file);
9346 else if (GET_CODE (x) == HIGH)
9348 fputs ("%hi(", file);
9349 output_addr_const (file, XEXP (x, 0));
9350 fputc (')', file);
9352 else if (GET_CODE (x) == LO_SUM)
9354 sparc_print_operand (file, XEXP (x, 0), 0);
9355 if (TARGET_CM_MEDMID)
9356 fputs ("+%l44(", file);
9357 else
9358 fputs ("+%lo(", file);
9359 output_addr_const (file, XEXP (x, 1));
9360 fputc (')', file);
9362 else if (GET_CODE (x) == CONST_DOUBLE)
9363 output_operand_lossage ("floating-point constant not a valid immediate operand");
9364 else
9365 output_addr_const (file, x);
9368 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9370 static void
9371 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9373 register rtx base, index = 0;
9374 int offset = 0;
9375 register rtx addr = x;
9377 if (REG_P (addr))
9378 fputs (reg_names[REGNO (addr)], file);
9379 else if (GET_CODE (addr) == PLUS)
9381 if (CONST_INT_P (XEXP (addr, 0)))
9382 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9383 else if (CONST_INT_P (XEXP (addr, 1)))
9384 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9385 else
9386 base = XEXP (addr, 0), index = XEXP (addr, 1);
9387 if (GET_CODE (base) == LO_SUM)
9389 gcc_assert (USE_AS_OFFSETABLE_LO10
9390 && TARGET_ARCH64
9391 && ! TARGET_CM_MEDMID);
9392 output_operand (XEXP (base, 0), 0);
9393 fputs ("+%lo(", file);
9394 output_address (VOIDmode, XEXP (base, 1));
9395 fprintf (file, ")+%d", offset);
9397 else
9399 fputs (reg_names[REGNO (base)], file);
9400 if (index == 0)
9401 fprintf (file, "%+d", offset);
9402 else if (REG_P (index))
9403 fprintf (file, "+%s", reg_names[REGNO (index)]);
9404 else if (GET_CODE (index) == SYMBOL_REF
9405 || GET_CODE (index) == LABEL_REF
9406 || GET_CODE (index) == CONST)
9407 fputc ('+', file), output_addr_const (file, index);
9408 else gcc_unreachable ();
9411 else if (GET_CODE (addr) == MINUS
9412 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9414 output_addr_const (file, XEXP (addr, 0));
9415 fputs ("-(", file);
9416 output_addr_const (file, XEXP (addr, 1));
9417 fputs ("-.)", file);
9419 else if (GET_CODE (addr) == LO_SUM)
9421 output_operand (XEXP (addr, 0), 0);
9422 if (TARGET_CM_MEDMID)
9423 fputs ("+%l44(", file);
9424 else
9425 fputs ("+%lo(", file);
9426 output_address (VOIDmode, XEXP (addr, 1));
9427 fputc (')', file);
9429 else if (flag_pic
9430 && GET_CODE (addr) == CONST
9431 && GET_CODE (XEXP (addr, 0)) == MINUS
9432 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9433 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9434 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9436 addr = XEXP (addr, 0);
9437 output_addr_const (file, XEXP (addr, 0));
9438 /* Group the args of the second CONST in parenthesis. */
9439 fputs ("-(", file);
9440 /* Skip past the second CONST--it does nothing for us. */
9441 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9442 /* Close the parenthesis. */
9443 fputc (')', file);
9445 else
9447 output_addr_const (file, addr);
9451 /* Target hook for assembling integer objects. The sparc version has
9452 special handling for aligned DI-mode objects. */
9454 static bool
9455 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9457 /* ??? We only output .xword's for symbols and only then in environments
9458 where the assembler can handle them. */
9459 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9461 if (TARGET_V9)
9463 assemble_integer_with_op ("\t.xword\t", x);
9464 return true;
9466 else
9468 assemble_aligned_integer (4, const0_rtx);
9469 assemble_aligned_integer (4, x);
9470 return true;
9473 return default_assemble_integer (x, size, aligned_p);
9476 /* Return the value of a code used in the .proc pseudo-op that says
9477 what kind of result this function returns. For non-C types, we pick
9478 the closest C type. */
9480 #ifndef SHORT_TYPE_SIZE
9481 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9482 #endif
9484 #ifndef INT_TYPE_SIZE
9485 #define INT_TYPE_SIZE BITS_PER_WORD
9486 #endif
9488 #ifndef LONG_TYPE_SIZE
9489 #define LONG_TYPE_SIZE BITS_PER_WORD
9490 #endif
9492 #ifndef LONG_LONG_TYPE_SIZE
9493 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9494 #endif
9496 #ifndef FLOAT_TYPE_SIZE
9497 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9498 #endif
9500 #ifndef DOUBLE_TYPE_SIZE
9501 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9502 #endif
9504 #ifndef LONG_DOUBLE_TYPE_SIZE
9505 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9506 #endif
9508 unsigned long
9509 sparc_type_code (register tree type)
9511 register unsigned long qualifiers = 0;
9512 register unsigned shift;
9514 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9515 setting more, since some assemblers will give an error for this. Also,
9516 we must be careful to avoid shifts of 32 bits or more to avoid getting
9517 unpredictable results. */
9519 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9521 switch (TREE_CODE (type))
9523 case ERROR_MARK:
9524 return qualifiers;
9526 case ARRAY_TYPE:
9527 qualifiers |= (3 << shift);
9528 break;
9530 case FUNCTION_TYPE:
9531 case METHOD_TYPE:
9532 qualifiers |= (2 << shift);
9533 break;
9535 case POINTER_TYPE:
9536 case REFERENCE_TYPE:
9537 case OFFSET_TYPE:
9538 qualifiers |= (1 << shift);
9539 break;
9541 case RECORD_TYPE:
9542 return (qualifiers | 8);
9544 case UNION_TYPE:
9545 case QUAL_UNION_TYPE:
9546 return (qualifiers | 9);
9548 case ENUMERAL_TYPE:
9549 return (qualifiers | 10);
9551 case VOID_TYPE:
9552 return (qualifiers | 16);
9554 case INTEGER_TYPE:
9555 /* If this is a range type, consider it to be the underlying
9556 type. */
9557 if (TREE_TYPE (type) != 0)
9558 break;
9560 /* Carefully distinguish all the standard types of C,
9561 without messing up if the language is not C. We do this by
9562 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9563 look at both the names and the above fields, but that's redundant.
9564 Any type whose size is between two C types will be considered
9565 to be the wider of the two types. Also, we do not have a
9566 special code to use for "long long", so anything wider than
9567 long is treated the same. Note that we can't distinguish
9568 between "int" and "long" in this code if they are the same
9569 size, but that's fine, since neither can the assembler. */
9571 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9572 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9574 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9575 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9577 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9578 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9580 else
9581 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9583 case REAL_TYPE:
9584 /* If this is a range type, consider it to be the underlying
9585 type. */
9586 if (TREE_TYPE (type) != 0)
9587 break;
9589 /* Carefully distinguish all the standard types of C,
9590 without messing up if the language is not C. */
9592 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9593 return (qualifiers | 6);
9595 else
9596 return (qualifiers | 7);
9598 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9599 /* ??? We need to distinguish between double and float complex types,
9600 but I don't know how yet because I can't reach this code from
9601 existing front-ends. */
9602 return (qualifiers | 7); /* Who knows? */
9604 case VECTOR_TYPE:
9605 case BOOLEAN_TYPE: /* Boolean truth value type. */
9606 case LANG_TYPE:
9607 case NULLPTR_TYPE:
9608 return qualifiers;
9610 default:
9611 gcc_unreachable (); /* Not a type! */
9615 return qualifiers;
9618 /* Nested function support. */
9620 /* Emit RTL insns to initialize the variable parts of a trampoline.
9621 FNADDR is an RTX for the address of the function's pure code.
9622 CXT is an RTX for the static chain value for the function.
9624 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9625 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9626 (to store insns). This is a bit excessive. Perhaps a different
9627 mechanism would be better here.
9629 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9631 static void
9632 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9634 /* SPARC 32-bit trampoline:
9636 sethi %hi(fn), %g1
9637 sethi %hi(static), %g2
9638 jmp %g1+%lo(fn)
9639 or %g2, %lo(static), %g2
9641 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9642 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9645 emit_move_insn
9646 (adjust_address (m_tramp, SImode, 0),
9647 expand_binop (SImode, ior_optab,
9648 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9649 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9650 NULL_RTX, 1, OPTAB_DIRECT));
9652 emit_move_insn
9653 (adjust_address (m_tramp, SImode, 4),
9654 expand_binop (SImode, ior_optab,
9655 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9656 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9657 NULL_RTX, 1, OPTAB_DIRECT));
9659 emit_move_insn
9660 (adjust_address (m_tramp, SImode, 8),
9661 expand_binop (SImode, ior_optab,
9662 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9663 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9664 NULL_RTX, 1, OPTAB_DIRECT));
9666 emit_move_insn
9667 (adjust_address (m_tramp, SImode, 12),
9668 expand_binop (SImode, ior_optab,
9669 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9670 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9671 NULL_RTX, 1, OPTAB_DIRECT));
9673 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9674 aligned on a 16 byte boundary so one flush clears it all. */
9675 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9676 if (sparc_cpu != PROCESSOR_ULTRASPARC
9677 && sparc_cpu != PROCESSOR_ULTRASPARC3
9678 && sparc_cpu != PROCESSOR_NIAGARA
9679 && sparc_cpu != PROCESSOR_NIAGARA2
9680 && sparc_cpu != PROCESSOR_NIAGARA3
9681 && sparc_cpu != PROCESSOR_NIAGARA4
9682 && sparc_cpu != PROCESSOR_NIAGARA7
9683 && sparc_cpu != PROCESSOR_M8)
9684 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9686 /* Call __enable_execute_stack after writing onto the stack to make sure
9687 the stack address is accessible. */
9688 #ifdef HAVE_ENABLE_EXECUTE_STACK
9689 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9690 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9691 #endif
9695 /* The 64-bit version is simpler because it makes more sense to load the
9696 values as "immediate" data out of the trampoline. It's also easier since
9697 we can read the PC without clobbering a register. */
9699 static void
9700 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9702 /* SPARC 64-bit trampoline:
9704 rd %pc, %g1
9705 ldx [%g1+24], %g5
9706 jmp %g5
9707 ldx [%g1+16], %g5
9708 +16 bytes data
9711 emit_move_insn (adjust_address (m_tramp, SImode, 0),
9712 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
9713 emit_move_insn (adjust_address (m_tramp, SImode, 4),
9714 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
9715 emit_move_insn (adjust_address (m_tramp, SImode, 8),
9716 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
9717 emit_move_insn (adjust_address (m_tramp, SImode, 12),
9718 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
9719 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
9720 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
9721 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
9723 if (sparc_cpu != PROCESSOR_ULTRASPARC
9724 && sparc_cpu != PROCESSOR_ULTRASPARC3
9725 && sparc_cpu != PROCESSOR_NIAGARA
9726 && sparc_cpu != PROCESSOR_NIAGARA2
9727 && sparc_cpu != PROCESSOR_NIAGARA3
9728 && sparc_cpu != PROCESSOR_NIAGARA4
9729 && sparc_cpu != PROCESSOR_NIAGARA7
9730 && sparc_cpu != PROCESSOR_M8)
9731 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
9733 /* Call __enable_execute_stack after writing onto the stack to make sure
9734 the stack address is accessible. */
9735 #ifdef HAVE_ENABLE_EXECUTE_STACK
9736 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9737 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9738 #endif
9741 /* Worker for TARGET_TRAMPOLINE_INIT. */
9743 static void
9744 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
9746 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
9747 cxt = force_reg (Pmode, cxt);
9748 if (TARGET_ARCH64)
9749 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
9750 else
9751 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
9754 /* Adjust the cost of a scheduling dependency. Return the new cost of
9755 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
9757 static int
9758 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
9759 int cost)
9761 enum attr_type insn_type;
9763 if (recog_memoized (insn) < 0)
9764 return cost;
9766 insn_type = get_attr_type (insn);
9768 if (dep_type == 0)
9770 /* Data dependency; DEP_INSN writes a register that INSN reads some
9771 cycles later. */
9773 /* if a load, then the dependence must be on the memory address;
9774 add an extra "cycle". Note that the cost could be two cycles
9775 if the reg was written late in an instruction group; we ca not tell
9776 here. */
9777 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
9778 return cost + 3;
9780 /* Get the delay only if the address of the store is the dependence. */
9781 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
9783 rtx pat = PATTERN(insn);
9784 rtx dep_pat = PATTERN (dep_insn);
9786 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9787 return cost; /* This should not happen! */
9789 /* The dependency between the two instructions was on the data that
9790 is being stored. Assume that this implies that the address of the
9791 store is not dependent. */
9792 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9793 return cost;
9795 return cost + 3; /* An approximation. */
9798 /* A shift instruction cannot receive its data from an instruction
9799 in the same cycle; add a one cycle penalty. */
9800 if (insn_type == TYPE_SHIFT)
9801 return cost + 3; /* Split before cascade into shift. */
9803 else
9805 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
9806 INSN writes some cycles later. */
9808 /* These are only significant for the fpu unit; writing a fp reg before
9809 the fpu has finished with it stalls the processor. */
9811 /* Reusing an integer register causes no problems. */
9812 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9813 return 0;
9816 return cost;
9819 static int
9820 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
9821 int cost)
9823 enum attr_type insn_type, dep_type;
9824 rtx pat = PATTERN(insn);
9825 rtx dep_pat = PATTERN (dep_insn);
9827 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
9828 return cost;
9830 insn_type = get_attr_type (insn);
9831 dep_type = get_attr_type (dep_insn);
9833 switch (dtype)
9835 case 0:
9836 /* Data dependency; DEP_INSN writes a register that INSN reads some
9837 cycles later. */
9839 switch (insn_type)
9841 case TYPE_STORE:
9842 case TYPE_FPSTORE:
9843 /* Get the delay iff the address of the store is the dependence. */
9844 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
9845 return cost;
9847 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
9848 return cost;
9849 return cost + 3;
9851 case TYPE_LOAD:
9852 case TYPE_SLOAD:
9853 case TYPE_FPLOAD:
9854 /* If a load, then the dependence must be on the memory address. If
9855 the addresses aren't equal, then it might be a false dependency */
9856 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
9858 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
9859 || GET_CODE (SET_DEST (dep_pat)) != MEM
9860 || GET_CODE (SET_SRC (pat)) != MEM
9861 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
9862 XEXP (SET_SRC (pat), 0)))
9863 return cost + 2;
9865 return cost + 8;
9867 break;
9869 case TYPE_BRANCH:
9870 /* Compare to branch latency is 0. There is no benefit from
9871 separating compare and branch. */
9872 if (dep_type == TYPE_COMPARE)
9873 return 0;
9874 /* Floating point compare to branch latency is less than
9875 compare to conditional move. */
9876 if (dep_type == TYPE_FPCMP)
9877 return cost - 1;
9878 break;
9879 default:
9880 break;
9882 break;
9884 case REG_DEP_ANTI:
9885 /* Anti-dependencies only penalize the fpu unit. */
9886 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
9887 return 0;
9888 break;
9890 default:
9891 break;
9894 return cost;
9897 static int
9898 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
9899 unsigned int)
9901 switch (sparc_cpu)
9903 case PROCESSOR_SUPERSPARC:
9904 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
9905 break;
9906 case PROCESSOR_HYPERSPARC:
9907 case PROCESSOR_SPARCLITE86X:
9908 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
9909 break;
9910 default:
9911 break;
9913 return cost;
9916 static void
9917 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
9918 int sched_verbose ATTRIBUTE_UNUSED,
9919 int max_ready ATTRIBUTE_UNUSED)
9922 static int
9923 sparc_use_sched_lookahead (void)
9925 if (sparc_cpu == PROCESSOR_NIAGARA
9926 || sparc_cpu == PROCESSOR_NIAGARA2
9927 || sparc_cpu == PROCESSOR_NIAGARA3)
9928 return 0;
9929 if (sparc_cpu == PROCESSOR_NIAGARA4
9930 || sparc_cpu == PROCESSOR_NIAGARA7
9931 || sparc_cpu == PROCESSOR_M8)
9932 return 2;
9933 if (sparc_cpu == PROCESSOR_ULTRASPARC
9934 || sparc_cpu == PROCESSOR_ULTRASPARC3)
9935 return 4;
9936 if ((1 << sparc_cpu) &
9937 ((1 << PROCESSOR_SUPERSPARC) | (1 << PROCESSOR_HYPERSPARC) |
9938 (1 << PROCESSOR_SPARCLITE86X)))
9939 return 3;
9940 return 0;
9943 static int
9944 sparc_issue_rate (void)
9946 switch (sparc_cpu)
9948 case PROCESSOR_NIAGARA:
9949 case PROCESSOR_NIAGARA2:
9950 case PROCESSOR_NIAGARA3:
9951 default:
9952 return 1;
9953 case PROCESSOR_NIAGARA4:
9954 case PROCESSOR_NIAGARA7:
9955 case PROCESSOR_V9:
9956 /* Assume V9 processors are capable of at least dual-issue. */
9957 return 2;
9958 case PROCESSOR_SUPERSPARC:
9959 return 3;
9960 case PROCESSOR_HYPERSPARC:
9961 case PROCESSOR_SPARCLITE86X:
9962 return 2;
9963 case PROCESSOR_ULTRASPARC:
9964 case PROCESSOR_ULTRASPARC3:
9965 case PROCESSOR_M8:
9966 return 4;
9970 static int
9971 set_extends (rtx_insn *insn)
9973 register rtx pat = PATTERN (insn);
9975 switch (GET_CODE (SET_SRC (pat)))
9977 /* Load and some shift instructions zero extend. */
9978 case MEM:
9979 case ZERO_EXTEND:
9980 /* sethi clears the high bits */
9981 case HIGH:
9982 /* LO_SUM is used with sethi. sethi cleared the high
9983 bits and the values used with lo_sum are positive */
9984 case LO_SUM:
9985 /* Store flag stores 0 or 1 */
9986 case LT: case LTU:
9987 case GT: case GTU:
9988 case LE: case LEU:
9989 case GE: case GEU:
9990 case EQ:
9991 case NE:
9992 return 1;
9993 case AND:
9995 rtx op0 = XEXP (SET_SRC (pat), 0);
9996 rtx op1 = XEXP (SET_SRC (pat), 1);
9997 if (GET_CODE (op1) == CONST_INT)
9998 return INTVAL (op1) >= 0;
9999 if (GET_CODE (op0) != REG)
10000 return 0;
10001 if (sparc_check_64 (op0, insn) == 1)
10002 return 1;
10003 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10005 case IOR:
10006 case XOR:
10008 rtx op0 = XEXP (SET_SRC (pat), 0);
10009 rtx op1 = XEXP (SET_SRC (pat), 1);
10010 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10011 return 0;
10012 if (GET_CODE (op1) == CONST_INT)
10013 return INTVAL (op1) >= 0;
10014 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10016 case LSHIFTRT:
10017 return GET_MODE (SET_SRC (pat)) == SImode;
10018 /* Positive integers leave the high bits zero. */
10019 case CONST_INT:
10020 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10021 case ASHIFTRT:
10022 case SIGN_EXTEND:
10023 return - (GET_MODE (SET_SRC (pat)) == SImode);
10024 case REG:
10025 return sparc_check_64 (SET_SRC (pat), insn);
10026 default:
10027 return 0;
10031 /* We _ought_ to have only one kind per function, but... */
10032 static GTY(()) rtx sparc_addr_diff_list;
10033 static GTY(()) rtx sparc_addr_list;
10035 void
10036 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10038 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10039 if (diff)
10040 sparc_addr_diff_list
10041 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10042 else
10043 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10046 static void
10047 sparc_output_addr_vec (rtx vec)
10049 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10050 int idx, vlen = XVECLEN (body, 0);
10052 #ifdef ASM_OUTPUT_ADDR_VEC_START
10053 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10054 #endif
10056 #ifdef ASM_OUTPUT_CASE_LABEL
10057 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10058 NEXT_INSN (lab));
10059 #else
10060 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10061 #endif
10063 for (idx = 0; idx < vlen; idx++)
10065 ASM_OUTPUT_ADDR_VEC_ELT
10066 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10069 #ifdef ASM_OUTPUT_ADDR_VEC_END
10070 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10071 #endif
10074 static void
10075 sparc_output_addr_diff_vec (rtx vec)
10077 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10078 rtx base = XEXP (XEXP (body, 0), 0);
10079 int idx, vlen = XVECLEN (body, 1);
10081 #ifdef ASM_OUTPUT_ADDR_VEC_START
10082 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10083 #endif
10085 #ifdef ASM_OUTPUT_CASE_LABEL
10086 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10087 NEXT_INSN (lab));
10088 #else
10089 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10090 #endif
10092 for (idx = 0; idx < vlen; idx++)
10094 ASM_OUTPUT_ADDR_DIFF_ELT
10095 (asm_out_file,
10096 body,
10097 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10098 CODE_LABEL_NUMBER (base));
10101 #ifdef ASM_OUTPUT_ADDR_VEC_END
10102 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10103 #endif
10106 static void
10107 sparc_output_deferred_case_vectors (void)
10109 rtx t;
10110 int align;
10112 if (sparc_addr_list == NULL_RTX
10113 && sparc_addr_diff_list == NULL_RTX)
10114 return;
10116 /* Align to cache line in the function's code section. */
10117 switch_to_section (current_function_section ());
10119 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10120 if (align > 0)
10121 ASM_OUTPUT_ALIGN (asm_out_file, align);
10123 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10124 sparc_output_addr_vec (XEXP (t, 0));
10125 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10126 sparc_output_addr_diff_vec (XEXP (t, 0));
10128 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10131 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10132 unknown. Return 1 if the high bits are zero, -1 if the register is
10133 sign extended. */
10135 sparc_check_64 (rtx x, rtx_insn *insn)
10137 /* If a register is set only once it is safe to ignore insns this
10138 code does not know how to handle. The loop will either recognize
10139 the single set and return the correct value or fail to recognize
10140 it and return 0. */
10141 int set_once = 0;
10142 rtx y = x;
10144 gcc_assert (GET_CODE (x) == REG);
10146 if (GET_MODE (x) == DImode)
10147 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10149 if (flag_expensive_optimizations
10150 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10151 set_once = 1;
10153 if (insn == 0)
10155 if (set_once)
10156 insn = get_last_insn_anywhere ();
10157 else
10158 return 0;
10161 while ((insn = PREV_INSN (insn)))
10163 switch (GET_CODE (insn))
10165 case JUMP_INSN:
10166 case NOTE:
10167 break;
10168 case CODE_LABEL:
10169 case CALL_INSN:
10170 default:
10171 if (! set_once)
10172 return 0;
10173 break;
10174 case INSN:
10176 rtx pat = PATTERN (insn);
10177 if (GET_CODE (pat) != SET)
10178 return 0;
10179 if (rtx_equal_p (x, SET_DEST (pat)))
10180 return set_extends (insn);
10181 if (y && rtx_equal_p (y, SET_DEST (pat)))
10182 return set_extends (insn);
10183 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10184 return 0;
10188 return 0;
10191 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10192 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10194 const char *
10195 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10197 static char asm_code[60];
10199 /* The scratch register is only required when the destination
10200 register is not a 64-bit global or out register. */
10201 if (which_alternative != 2)
10202 operands[3] = operands[0];
10204 /* We can only shift by constants <= 63. */
10205 if (GET_CODE (operands[2]) == CONST_INT)
10206 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10208 if (GET_CODE (operands[1]) == CONST_INT)
10210 output_asm_insn ("mov\t%1, %3", operands);
10212 else
10214 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10215 if (sparc_check_64 (operands[1], insn) <= 0)
10216 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10217 output_asm_insn ("or\t%L1, %3, %3", operands);
10220 strcpy (asm_code, opcode);
10222 if (which_alternative != 2)
10223 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10224 else
10225 return
10226 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10229 /* Output rtl to increment the profiler label LABELNO
10230 for profiling a function entry. */
10232 void
10233 sparc_profile_hook (int labelno)
10235 char buf[32];
10236 rtx lab, fun;
10238 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10239 if (NO_PROFILE_COUNTERS)
10241 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10243 else
10245 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10246 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10247 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10251 #ifdef TARGET_SOLARIS
10252 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10254 static void
10255 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10256 tree decl ATTRIBUTE_UNUSED)
10258 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10260 solaris_elf_asm_comdat_section (name, flags, decl);
10261 return;
10264 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10266 if (!(flags & SECTION_DEBUG))
10267 fputs (",#alloc", asm_out_file);
10268 if (flags & SECTION_WRITE)
10269 fputs (",#write", asm_out_file);
10270 if (flags & SECTION_TLS)
10271 fputs (",#tls", asm_out_file);
10272 if (flags & SECTION_CODE)
10273 fputs (",#execinstr", asm_out_file);
10275 if (flags & SECTION_NOTYPE)
10277 else if (flags & SECTION_BSS)
10278 fputs (",#nobits", asm_out_file);
10279 else
10280 fputs (",#progbits", asm_out_file);
10282 fputc ('\n', asm_out_file);
10284 #endif /* TARGET_SOLARIS */
10286 /* We do not allow indirect calls to be optimized into sibling calls.
10288 We cannot use sibling calls when delayed branches are disabled
10289 because they will likely require the call delay slot to be filled.
10291 Also, on SPARC 32-bit we cannot emit a sibling call when the
10292 current function returns a structure. This is because the "unimp
10293 after call" convention would cause the callee to return to the
10294 wrong place. The generic code already disallows cases where the
10295 function being called returns a structure.
10297 It may seem strange how this last case could occur. Usually there
10298 is code after the call which jumps to epilogue code which dumps the
10299 return value into the struct return area. That ought to invalidate
10300 the sibling call right? Well, in the C++ case we can end up passing
10301 the pointer to the struct return area to a constructor (which returns
10302 void) and then nothing else happens. Such a sibling call would look
10303 valid without the added check here.
10305 VxWorks PIC PLT entries require the global pointer to be initialized
10306 on entry. We therefore can't emit sibling calls to them. */
10307 static bool
10308 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10310 return (decl
10311 && flag_delayed_branch
10312 && (TARGET_ARCH64 || ! cfun->returns_struct)
10313 && !(TARGET_VXWORKS_RTP
10314 && flag_pic
10315 && !targetm.binds_local_p (decl)));
10318 /* libfunc renaming. */
10320 static void
10321 sparc_init_libfuncs (void)
10323 if (TARGET_ARCH32)
10325 /* Use the subroutines that Sun's library provides for integer
10326 multiply and divide. The `*' prevents an underscore from
10327 being prepended by the compiler. .umul is a little faster
10328 than .mul. */
10329 set_optab_libfunc (smul_optab, SImode, "*.umul");
10330 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10331 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10332 set_optab_libfunc (smod_optab, SImode, "*.rem");
10333 set_optab_libfunc (umod_optab, SImode, "*.urem");
10335 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10336 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10337 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10338 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10339 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10340 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10342 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10343 is because with soft-float, the SFmode and DFmode sqrt
10344 instructions will be absent, and the compiler will notice and
10345 try to use the TFmode sqrt instruction for calls to the
10346 builtin function sqrt, but this fails. */
10347 if (TARGET_FPU)
10348 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10350 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10351 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10352 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10353 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10354 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10355 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10357 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10358 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10359 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10360 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10362 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10363 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10364 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10365 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10367 if (DITF_CONVERSION_LIBFUNCS)
10369 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10370 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10371 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10372 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10375 if (SUN_CONVERSION_LIBFUNCS)
10377 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10378 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10379 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10380 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10383 if (TARGET_ARCH64)
10385 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10386 do not exist in the library. Make sure the compiler does not
10387 emit calls to them by accident. (It should always use the
10388 hardware instructions.) */
10389 set_optab_libfunc (smul_optab, SImode, 0);
10390 set_optab_libfunc (sdiv_optab, SImode, 0);
10391 set_optab_libfunc (udiv_optab, SImode, 0);
10392 set_optab_libfunc (smod_optab, SImode, 0);
10393 set_optab_libfunc (umod_optab, SImode, 0);
10395 if (SUN_INTEGER_MULTIPLY_64)
10397 set_optab_libfunc (smul_optab, DImode, "__mul64");
10398 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10399 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10400 set_optab_libfunc (smod_optab, DImode, "__rem64");
10401 set_optab_libfunc (umod_optab, DImode, "__urem64");
10404 if (SUN_CONVERSION_LIBFUNCS)
10406 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10407 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10408 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10409 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10414 /* SPARC builtins. */
10415 enum sparc_builtins
10417 /* FPU builtins. */
10418 SPARC_BUILTIN_LDFSR,
10419 SPARC_BUILTIN_STFSR,
10421 /* VIS 1.0 builtins. */
10422 SPARC_BUILTIN_FPACK16,
10423 SPARC_BUILTIN_FPACK32,
10424 SPARC_BUILTIN_FPACKFIX,
10425 SPARC_BUILTIN_FEXPAND,
10426 SPARC_BUILTIN_FPMERGE,
10427 SPARC_BUILTIN_FMUL8X16,
10428 SPARC_BUILTIN_FMUL8X16AU,
10429 SPARC_BUILTIN_FMUL8X16AL,
10430 SPARC_BUILTIN_FMUL8SUX16,
10431 SPARC_BUILTIN_FMUL8ULX16,
10432 SPARC_BUILTIN_FMULD8SUX16,
10433 SPARC_BUILTIN_FMULD8ULX16,
10434 SPARC_BUILTIN_FALIGNDATAV4HI,
10435 SPARC_BUILTIN_FALIGNDATAV8QI,
10436 SPARC_BUILTIN_FALIGNDATAV2SI,
10437 SPARC_BUILTIN_FALIGNDATADI,
10438 SPARC_BUILTIN_WRGSR,
10439 SPARC_BUILTIN_RDGSR,
10440 SPARC_BUILTIN_ALIGNADDR,
10441 SPARC_BUILTIN_ALIGNADDRL,
10442 SPARC_BUILTIN_PDIST,
10443 SPARC_BUILTIN_EDGE8,
10444 SPARC_BUILTIN_EDGE8L,
10445 SPARC_BUILTIN_EDGE16,
10446 SPARC_BUILTIN_EDGE16L,
10447 SPARC_BUILTIN_EDGE32,
10448 SPARC_BUILTIN_EDGE32L,
10449 SPARC_BUILTIN_FCMPLE16,
10450 SPARC_BUILTIN_FCMPLE32,
10451 SPARC_BUILTIN_FCMPNE16,
10452 SPARC_BUILTIN_FCMPNE32,
10453 SPARC_BUILTIN_FCMPGT16,
10454 SPARC_BUILTIN_FCMPGT32,
10455 SPARC_BUILTIN_FCMPEQ16,
10456 SPARC_BUILTIN_FCMPEQ32,
10457 SPARC_BUILTIN_FPADD16,
10458 SPARC_BUILTIN_FPADD16S,
10459 SPARC_BUILTIN_FPADD32,
10460 SPARC_BUILTIN_FPADD32S,
10461 SPARC_BUILTIN_FPSUB16,
10462 SPARC_BUILTIN_FPSUB16S,
10463 SPARC_BUILTIN_FPSUB32,
10464 SPARC_BUILTIN_FPSUB32S,
10465 SPARC_BUILTIN_ARRAY8,
10466 SPARC_BUILTIN_ARRAY16,
10467 SPARC_BUILTIN_ARRAY32,
10469 /* VIS 2.0 builtins. */
10470 SPARC_BUILTIN_EDGE8N,
10471 SPARC_BUILTIN_EDGE8LN,
10472 SPARC_BUILTIN_EDGE16N,
10473 SPARC_BUILTIN_EDGE16LN,
10474 SPARC_BUILTIN_EDGE32N,
10475 SPARC_BUILTIN_EDGE32LN,
10476 SPARC_BUILTIN_BMASK,
10477 SPARC_BUILTIN_BSHUFFLEV4HI,
10478 SPARC_BUILTIN_BSHUFFLEV8QI,
10479 SPARC_BUILTIN_BSHUFFLEV2SI,
10480 SPARC_BUILTIN_BSHUFFLEDI,
10482 /* VIS 3.0 builtins. */
10483 SPARC_BUILTIN_CMASK8,
10484 SPARC_BUILTIN_CMASK16,
10485 SPARC_BUILTIN_CMASK32,
10486 SPARC_BUILTIN_FCHKSM16,
10487 SPARC_BUILTIN_FSLL16,
10488 SPARC_BUILTIN_FSLAS16,
10489 SPARC_BUILTIN_FSRL16,
10490 SPARC_BUILTIN_FSRA16,
10491 SPARC_BUILTIN_FSLL32,
10492 SPARC_BUILTIN_FSLAS32,
10493 SPARC_BUILTIN_FSRL32,
10494 SPARC_BUILTIN_FSRA32,
10495 SPARC_BUILTIN_PDISTN,
10496 SPARC_BUILTIN_FMEAN16,
10497 SPARC_BUILTIN_FPADD64,
10498 SPARC_BUILTIN_FPSUB64,
10499 SPARC_BUILTIN_FPADDS16,
10500 SPARC_BUILTIN_FPADDS16S,
10501 SPARC_BUILTIN_FPSUBS16,
10502 SPARC_BUILTIN_FPSUBS16S,
10503 SPARC_BUILTIN_FPADDS32,
10504 SPARC_BUILTIN_FPADDS32S,
10505 SPARC_BUILTIN_FPSUBS32,
10506 SPARC_BUILTIN_FPSUBS32S,
10507 SPARC_BUILTIN_FUCMPLE8,
10508 SPARC_BUILTIN_FUCMPNE8,
10509 SPARC_BUILTIN_FUCMPGT8,
10510 SPARC_BUILTIN_FUCMPEQ8,
10511 SPARC_BUILTIN_FHADDS,
10512 SPARC_BUILTIN_FHADDD,
10513 SPARC_BUILTIN_FHSUBS,
10514 SPARC_BUILTIN_FHSUBD,
10515 SPARC_BUILTIN_FNHADDS,
10516 SPARC_BUILTIN_FNHADDD,
10517 SPARC_BUILTIN_UMULXHI,
10518 SPARC_BUILTIN_XMULX,
10519 SPARC_BUILTIN_XMULXHI,
10521 /* VIS 4.0 builtins. */
10522 SPARC_BUILTIN_FPADD8,
10523 SPARC_BUILTIN_FPADDS8,
10524 SPARC_BUILTIN_FPADDUS8,
10525 SPARC_BUILTIN_FPADDUS16,
10526 SPARC_BUILTIN_FPCMPLE8,
10527 SPARC_BUILTIN_FPCMPGT8,
10528 SPARC_BUILTIN_FPCMPULE16,
10529 SPARC_BUILTIN_FPCMPUGT16,
10530 SPARC_BUILTIN_FPCMPULE32,
10531 SPARC_BUILTIN_FPCMPUGT32,
10532 SPARC_BUILTIN_FPMAX8,
10533 SPARC_BUILTIN_FPMAX16,
10534 SPARC_BUILTIN_FPMAX32,
10535 SPARC_BUILTIN_FPMAXU8,
10536 SPARC_BUILTIN_FPMAXU16,
10537 SPARC_BUILTIN_FPMAXU32,
10538 SPARC_BUILTIN_FPMIN8,
10539 SPARC_BUILTIN_FPMIN16,
10540 SPARC_BUILTIN_FPMIN32,
10541 SPARC_BUILTIN_FPMINU8,
10542 SPARC_BUILTIN_FPMINU16,
10543 SPARC_BUILTIN_FPMINU32,
10544 SPARC_BUILTIN_FPSUB8,
10545 SPARC_BUILTIN_FPSUBS8,
10546 SPARC_BUILTIN_FPSUBUS8,
10547 SPARC_BUILTIN_FPSUBUS16,
10549 /* VIS 4.0B builtins. */
10551 /* Note that all the DICTUNPACK* entries should be kept
10552 contiguous. */
10553 SPARC_BUILTIN_FIRST_DICTUNPACK,
10554 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10555 SPARC_BUILTIN_DICTUNPACK16,
10556 SPARC_BUILTIN_DICTUNPACK32,
10557 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10559 /* Note that all the FPCMP*SHL entries should be kept
10560 contiguous. */
10561 SPARC_BUILTIN_FIRST_FPCMPSHL,
10562 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10563 SPARC_BUILTIN_FPCMPGT8SHL,
10564 SPARC_BUILTIN_FPCMPEQ8SHL,
10565 SPARC_BUILTIN_FPCMPNE8SHL,
10566 SPARC_BUILTIN_FPCMPLE16SHL,
10567 SPARC_BUILTIN_FPCMPGT16SHL,
10568 SPARC_BUILTIN_FPCMPEQ16SHL,
10569 SPARC_BUILTIN_FPCMPNE16SHL,
10570 SPARC_BUILTIN_FPCMPLE32SHL,
10571 SPARC_BUILTIN_FPCMPGT32SHL,
10572 SPARC_BUILTIN_FPCMPEQ32SHL,
10573 SPARC_BUILTIN_FPCMPNE32SHL,
10574 SPARC_BUILTIN_FPCMPULE8SHL,
10575 SPARC_BUILTIN_FPCMPUGT8SHL,
10576 SPARC_BUILTIN_FPCMPULE16SHL,
10577 SPARC_BUILTIN_FPCMPUGT16SHL,
10578 SPARC_BUILTIN_FPCMPULE32SHL,
10579 SPARC_BUILTIN_FPCMPUGT32SHL,
10580 SPARC_BUILTIN_FPCMPDE8SHL,
10581 SPARC_BUILTIN_FPCMPDE16SHL,
10582 SPARC_BUILTIN_FPCMPDE32SHL,
10583 SPARC_BUILTIN_FPCMPUR8SHL,
10584 SPARC_BUILTIN_FPCMPUR16SHL,
10585 SPARC_BUILTIN_FPCMPUR32SHL,
10586 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10588 SPARC_BUILTIN_MAX
10591 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10592 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10594 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10595 The instruction should require a constant operand of some sort. The
10596 function prints an error if OPVAL is not valid. */
10598 static int
10599 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10601 if (GET_CODE (opval) != CONST_INT)
10603 error ("%qs expects a constant argument", insn_data[icode].name);
10604 return false;
10607 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10609 error ("constant argument out of range for %qs", insn_data[icode].name);
10610 return false;
10612 return true;
10615 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10616 function decl or NULL_TREE if the builtin was not added. */
10618 static tree
10619 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10620 tree type)
10622 tree t
10623 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10625 if (t)
10627 sparc_builtins[code] = t;
10628 sparc_builtins_icode[code] = icode;
10631 return t;
10634 /* Likewise, but also marks the function as "const". */
10636 static tree
10637 def_builtin_const (const char *name, enum insn_code icode,
10638 enum sparc_builtins code, tree type)
10640 tree t = def_builtin (name, icode, code, type);
10642 if (t)
10643 TREE_READONLY (t) = 1;
10645 return t;
10648 /* Implement the TARGET_INIT_BUILTINS target hook.
10649 Create builtin functions for special SPARC instructions. */
10651 static void
10652 sparc_init_builtins (void)
10654 if (TARGET_FPU)
10655 sparc_fpu_init_builtins ();
10657 if (TARGET_VIS)
10658 sparc_vis_init_builtins ();
10661 /* Create builtin functions for FPU instructions. */
10663 static void
10664 sparc_fpu_init_builtins (void)
10666 tree ftype
10667 = build_function_type_list (void_type_node,
10668 build_pointer_type (unsigned_type_node), 0);
10669 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
10670 SPARC_BUILTIN_LDFSR, ftype);
10671 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
10672 SPARC_BUILTIN_STFSR, ftype);
10675 /* Create builtin functions for VIS instructions. */
10677 static void
10678 sparc_vis_init_builtins (void)
10680 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
10681 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
10682 tree v4hi = build_vector_type (intHI_type_node, 4);
10683 tree v2hi = build_vector_type (intHI_type_node, 2);
10684 tree v2si = build_vector_type (intSI_type_node, 2);
10685 tree v1si = build_vector_type (intSI_type_node, 1);
10687 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
10688 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
10689 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
10690 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
10691 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
10692 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
10693 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
10694 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
10695 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
10696 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
10697 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
10698 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
10699 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
10700 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
10701 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
10702 v8qi, v8qi,
10703 intDI_type_node, 0);
10704 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
10705 v8qi, v8qi, 0);
10706 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
10707 v8qi, v8qi, 0);
10708 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
10709 intSI_type_node, 0);
10710 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
10711 intSI_type_node, 0);
10712 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
10713 intDI_type_node, 0);
10714 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
10715 intDI_type_node,
10716 intDI_type_node, 0);
10717 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
10718 intSI_type_node,
10719 intSI_type_node, 0);
10720 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
10721 ptr_type_node,
10722 intSI_type_node, 0);
10723 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
10724 ptr_type_node,
10725 intDI_type_node, 0);
10726 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
10727 ptr_type_node,
10728 ptr_type_node, 0);
10729 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
10730 ptr_type_node,
10731 ptr_type_node, 0);
10732 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
10733 v4hi, v4hi, 0);
10734 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
10735 v2si, v2si, 0);
10736 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
10737 v4hi, v4hi, 0);
10738 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
10739 v2si, v2si, 0);
10740 tree void_ftype_di = build_function_type_list (void_type_node,
10741 intDI_type_node, 0);
10742 tree di_ftype_void = build_function_type_list (intDI_type_node,
10743 void_type_node, 0);
10744 tree void_ftype_si = build_function_type_list (void_type_node,
10745 intSI_type_node, 0);
10746 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
10747 float_type_node,
10748 float_type_node, 0);
10749 tree df_ftype_df_df = build_function_type_list (double_type_node,
10750 double_type_node,
10751 double_type_node, 0);
10753 /* Packing and expanding vectors. */
10754 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
10755 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
10756 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
10757 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
10758 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
10759 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
10760 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
10761 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
10762 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
10763 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
10765 /* Multiplications. */
10766 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
10767 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
10768 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
10769 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
10770 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
10771 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
10772 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
10773 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
10774 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
10775 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
10776 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
10777 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
10778 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
10779 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
10781 /* Data aligning. */
10782 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
10783 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
10784 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
10785 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
10786 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
10787 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
10788 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
10789 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
10791 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
10792 SPARC_BUILTIN_WRGSR, void_ftype_di);
10793 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
10794 SPARC_BUILTIN_RDGSR, di_ftype_void);
10796 if (TARGET_ARCH64)
10798 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
10799 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
10800 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
10801 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
10803 else
10805 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
10806 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
10807 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
10808 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
10811 /* Pixel distance. */
10812 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
10813 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
10815 /* Edge handling. */
10816 if (TARGET_ARCH64)
10818 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
10819 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
10820 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
10821 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
10822 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
10823 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
10824 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
10825 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
10826 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
10827 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
10828 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
10829 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
10831 else
10833 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
10834 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
10835 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
10836 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
10837 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
10838 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
10839 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
10840 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
10841 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
10842 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
10843 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
10844 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
10847 /* Pixel compare. */
10848 if (TARGET_ARCH64)
10850 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
10851 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
10852 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
10853 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
10854 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
10855 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
10856 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
10857 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
10858 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
10859 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
10860 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
10861 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
10862 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
10863 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
10864 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
10865 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
10867 else
10869 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
10870 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
10871 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
10872 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
10873 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
10874 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
10875 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
10876 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
10877 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
10878 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
10879 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
10880 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
10881 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
10882 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
10883 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
10884 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
10887 /* Addition and subtraction. */
10888 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
10889 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
10890 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
10891 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
10892 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
10893 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
10894 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
10895 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
10896 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
10897 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
10898 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
10899 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
10900 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
10901 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
10902 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
10903 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
10905 /* Three-dimensional array addressing. */
10906 if (TARGET_ARCH64)
10908 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
10909 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
10910 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
10911 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
10912 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
10913 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
10915 else
10917 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
10918 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
10919 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
10920 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
10921 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
10922 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
10925 if (TARGET_VIS2)
10927 /* Edge handling. */
10928 if (TARGET_ARCH64)
10930 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
10931 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
10932 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
10933 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
10934 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
10935 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
10936 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
10937 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
10938 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
10939 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
10940 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
10941 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
10943 else
10945 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
10946 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
10947 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
10948 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
10949 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
10950 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
10951 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
10952 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
10953 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
10954 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
10955 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
10956 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
10959 /* Byte mask and shuffle. */
10960 if (TARGET_ARCH64)
10961 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
10962 SPARC_BUILTIN_BMASK, di_ftype_di_di);
10963 else
10964 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
10965 SPARC_BUILTIN_BMASK, si_ftype_si_si);
10966 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
10967 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
10968 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
10969 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
10970 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
10971 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
10972 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
10973 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
10976 if (TARGET_VIS3)
10978 if (TARGET_ARCH64)
10980 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
10981 SPARC_BUILTIN_CMASK8, void_ftype_di);
10982 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
10983 SPARC_BUILTIN_CMASK16, void_ftype_di);
10984 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
10985 SPARC_BUILTIN_CMASK32, void_ftype_di);
10987 else
10989 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
10990 SPARC_BUILTIN_CMASK8, void_ftype_si);
10991 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
10992 SPARC_BUILTIN_CMASK16, void_ftype_si);
10993 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
10994 SPARC_BUILTIN_CMASK32, void_ftype_si);
10997 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
10998 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11000 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11001 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11002 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11003 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11004 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11005 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11006 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11007 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11008 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11009 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11010 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11011 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11012 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11013 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11014 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11015 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11017 if (TARGET_ARCH64)
11018 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11019 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11020 else
11021 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11022 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11024 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11025 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11026 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11027 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11028 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11029 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11031 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11032 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11033 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11034 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11035 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11036 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11037 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11038 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11039 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11040 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11041 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11042 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11043 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11044 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11045 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11046 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11048 if (TARGET_ARCH64)
11050 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11051 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11052 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11053 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11054 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11055 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11056 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11057 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11059 else
11061 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11062 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11063 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11064 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11065 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11066 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11067 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11068 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11071 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11072 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11073 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11074 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11075 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11076 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11077 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11078 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11079 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11080 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11081 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11082 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11084 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11085 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11086 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11087 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11088 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11089 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11092 if (TARGET_VIS4)
11094 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11095 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11096 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11097 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11098 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11099 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11100 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11101 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11104 if (TARGET_ARCH64)
11106 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11107 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11108 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11109 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11110 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11111 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11112 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11113 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11114 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11115 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11116 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11117 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11119 else
11121 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11122 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11123 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11124 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11125 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11126 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11127 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11128 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11129 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11130 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11131 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11132 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11135 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11136 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11137 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11138 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11139 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11140 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11141 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11142 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11143 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11144 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11145 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11146 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11147 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11148 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11149 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11150 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11151 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11152 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11153 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11154 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11155 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11156 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11157 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11158 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11159 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11160 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11161 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11162 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11163 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11164 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11165 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11166 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11169 if (TARGET_VIS4B)
11171 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11172 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11173 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11174 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11175 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11176 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11178 if (TARGET_ARCH64)
11180 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11181 v8qi, v8qi,
11182 intSI_type_node, 0);
11183 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11184 v4hi, v4hi,
11185 intSI_type_node, 0);
11186 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11187 v2si, v2si,
11188 intSI_type_node, 0);
11190 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11191 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11192 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11193 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11194 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11195 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11196 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11197 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11199 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11200 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11201 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11202 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11203 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11204 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11205 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11206 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11208 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11209 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11210 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11211 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11212 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11213 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11214 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11215 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11218 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11219 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11220 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11221 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11223 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11224 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11225 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11226 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11228 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11229 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11230 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11231 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11233 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11234 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11235 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11236 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11237 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11238 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11240 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11241 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11242 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11243 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11244 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11245 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11248 else
11250 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11251 v8qi, v8qi,
11252 intSI_type_node, 0);
11253 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11254 v4hi, v4hi,
11255 intSI_type_node, 0);
11256 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11257 v2si, v2si,
11258 intSI_type_node, 0);
11260 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11261 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11262 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11263 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11264 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11265 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11266 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11267 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11269 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11270 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11271 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11272 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11273 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11274 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11275 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11276 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11278 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11279 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11280 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11281 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11282 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11283 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11284 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11285 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11288 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11289 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11290 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11291 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11293 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11294 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11295 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11296 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11298 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11299 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11300 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11301 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11303 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11304 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11305 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11306 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11307 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11308 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11310 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11311 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11312 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11313 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11314 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11315 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11320 /* Implement TARGET_BUILTIN_DECL hook. */
11322 static tree
11323 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11325 if (code >= SPARC_BUILTIN_MAX)
11326 return error_mark_node;
11328 return sparc_builtins[code];
11331 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11333 static rtx
11334 sparc_expand_builtin (tree exp, rtx target,
11335 rtx subtarget ATTRIBUTE_UNUSED,
11336 machine_mode tmode ATTRIBUTE_UNUSED,
11337 int ignore ATTRIBUTE_UNUSED)
11339 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11340 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11341 enum insn_code icode = sparc_builtins_icode[code];
11342 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11343 call_expr_arg_iterator iter;
11344 int arg_count = 0;
11345 rtx pat, op[4];
11346 tree arg;
11348 if (nonvoid)
11350 machine_mode tmode = insn_data[icode].operand[0].mode;
11351 if (!target
11352 || GET_MODE (target) != tmode
11353 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11354 op[0] = gen_reg_rtx (tmode);
11355 else
11356 op[0] = target;
11359 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11361 const struct insn_operand_data *insn_op;
11362 int idx;
11364 if (arg == error_mark_node)
11365 return NULL_RTX;
11367 arg_count++;
11368 idx = arg_count - !nonvoid;
11369 insn_op = &insn_data[icode].operand[idx];
11370 op[arg_count] = expand_normal (arg);
11372 /* Some of the builtins require constant arguments. We check
11373 for this here. */
11374 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11375 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11376 && arg_count == 3)
11377 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11378 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11379 && arg_count == 2))
11381 if (!check_constant_argument (icode, idx, op[arg_count]))
11382 return const0_rtx;
11385 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11387 if (!address_operand (op[arg_count], SImode))
11389 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11390 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11392 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11395 else if (insn_op->mode == V1DImode
11396 && GET_MODE (op[arg_count]) == DImode)
11397 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11399 else if (insn_op->mode == V1SImode
11400 && GET_MODE (op[arg_count]) == SImode)
11401 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11403 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11404 insn_op->mode))
11405 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11408 switch (arg_count)
11410 case 0:
11411 pat = GEN_FCN (icode) (op[0]);
11412 break;
11413 case 1:
11414 if (nonvoid)
11415 pat = GEN_FCN (icode) (op[0], op[1]);
11416 else
11417 pat = GEN_FCN (icode) (op[1]);
11418 break;
11419 case 2:
11420 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11421 break;
11422 case 3:
11423 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11424 break;
11425 default:
11426 gcc_unreachable ();
11429 if (!pat)
11430 return NULL_RTX;
11432 emit_insn (pat);
11434 return (nonvoid ? op[0] : const0_rtx);
11437 /* Return the upper 16 bits of the 8x16 multiplication. */
11439 static int
11440 sparc_vis_mul8x16 (int e8, int e16)
11442 return (e8 * e16 + 128) / 256;
11445 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11446 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11448 static void
11449 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11450 tree inner_type, tree cst0, tree cst1)
11452 unsigned i, num = VECTOR_CST_NELTS (cst0);
11453 int scale;
11455 switch (fncode)
11457 case SPARC_BUILTIN_FMUL8X16:
11458 for (i = 0; i < num; ++i)
11460 int val
11461 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11462 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11463 n_elts->quick_push (build_int_cst (inner_type, val));
11465 break;
11467 case SPARC_BUILTIN_FMUL8X16AU:
11468 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11470 for (i = 0; i < num; ++i)
11472 int val
11473 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11474 scale);
11475 n_elts->quick_push (build_int_cst (inner_type, val));
11477 break;
11479 case SPARC_BUILTIN_FMUL8X16AL:
11480 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11482 for (i = 0; i < num; ++i)
11484 int val
11485 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11486 scale);
11487 n_elts->quick_push (build_int_cst (inner_type, val));
11489 break;
11491 default:
11492 gcc_unreachable ();
11496 /* Implement TARGET_FOLD_BUILTIN hook.
11498 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11499 result of the function call is ignored. NULL_TREE is returned if the
11500 function could not be folded. */
11502 static tree
11503 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11504 tree *args, bool ignore)
11506 enum sparc_builtins code = (enum sparc_builtins) DECL_FUNCTION_CODE (fndecl);
11507 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11508 tree arg0, arg1, arg2;
11510 if (ignore)
11511 switch (code)
11513 case SPARC_BUILTIN_LDFSR:
11514 case SPARC_BUILTIN_STFSR:
11515 case SPARC_BUILTIN_ALIGNADDR:
11516 case SPARC_BUILTIN_WRGSR:
11517 case SPARC_BUILTIN_BMASK:
11518 case SPARC_BUILTIN_CMASK8:
11519 case SPARC_BUILTIN_CMASK16:
11520 case SPARC_BUILTIN_CMASK32:
11521 break;
11523 default:
11524 return build_zero_cst (rtype);
11527 switch (code)
11529 case SPARC_BUILTIN_FEXPAND:
11530 arg0 = args[0];
11531 STRIP_NOPS (arg0);
11533 if (TREE_CODE (arg0) == VECTOR_CST)
11535 tree inner_type = TREE_TYPE (rtype);
11536 unsigned i;
11538 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11539 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11541 unsigned HOST_WIDE_INT val
11542 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11543 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11545 return build_vector (rtype, n_elts);
11547 break;
11549 case SPARC_BUILTIN_FMUL8X16:
11550 case SPARC_BUILTIN_FMUL8X16AU:
11551 case SPARC_BUILTIN_FMUL8X16AL:
11552 arg0 = args[0];
11553 arg1 = args[1];
11554 STRIP_NOPS (arg0);
11555 STRIP_NOPS (arg1);
11557 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11559 tree inner_type = TREE_TYPE (rtype);
11560 auto_vec<tree, 32> n_elts (VECTOR_CST_NELTS (arg0));
11561 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11562 return build_vector (rtype, n_elts);
11564 break;
11566 case SPARC_BUILTIN_FPMERGE:
11567 arg0 = args[0];
11568 arg1 = args[1];
11569 STRIP_NOPS (arg0);
11570 STRIP_NOPS (arg1);
11572 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11574 auto_vec<tree, 32> n_elts (2 * VECTOR_CST_NELTS (arg0));
11575 unsigned i;
11576 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11578 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11579 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11582 return build_vector (rtype, n_elts);
11584 break;
11586 case SPARC_BUILTIN_PDIST:
11587 case SPARC_BUILTIN_PDISTN:
11588 arg0 = args[0];
11589 arg1 = args[1];
11590 STRIP_NOPS (arg0);
11591 STRIP_NOPS (arg1);
11592 if (code == SPARC_BUILTIN_PDIST)
11594 arg2 = args[2];
11595 STRIP_NOPS (arg2);
11597 else
11598 arg2 = integer_zero_node;
11600 if (TREE_CODE (arg0) == VECTOR_CST
11601 && TREE_CODE (arg1) == VECTOR_CST
11602 && TREE_CODE (arg2) == INTEGER_CST)
11604 bool overflow = false;
11605 widest_int result = wi::to_widest (arg2);
11606 widest_int tmp;
11607 unsigned i;
11609 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11611 tree e0 = VECTOR_CST_ELT (arg0, i);
11612 tree e1 = VECTOR_CST_ELT (arg1, i);
11614 bool neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11616 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11617 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11618 if (wi::neg_p (tmp))
11619 tmp = wi::neg (tmp, &neg2_ovf);
11620 else
11621 neg2_ovf = false;
11622 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11623 overflow |= neg1_ovf | neg2_ovf | add1_ovf | add2_ovf;
11626 gcc_assert (!overflow);
11628 return wide_int_to_tree (rtype, result);
11631 default:
11632 break;
11635 return NULL_TREE;
11638 /* ??? This duplicates information provided to the compiler by the
11639 ??? scheduler description. Some day, teach genautomata to output
11640 ??? the latencies and then CSE will just use that. */
11642 static bool
11643 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11644 int opno ATTRIBUTE_UNUSED,
11645 int *total, bool speed ATTRIBUTE_UNUSED)
11647 int code = GET_CODE (x);
11648 bool float_mode_p = FLOAT_MODE_P (mode);
11650 switch (code)
11652 case CONST_INT:
11653 if (SMALL_INT (x))
11654 *total = 0;
11655 else
11656 *total = 2;
11657 return true;
11659 case CONST_WIDE_INT:
11660 *total = 0;
11661 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
11662 *total += 2;
11663 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
11664 *total += 2;
11665 return true;
11667 case HIGH:
11668 *total = 2;
11669 return true;
11671 case CONST:
11672 case LABEL_REF:
11673 case SYMBOL_REF:
11674 *total = 4;
11675 return true;
11677 case CONST_DOUBLE:
11678 *total = 8;
11679 return true;
11681 case MEM:
11682 /* If outer-code was a sign or zero extension, a cost
11683 of COSTS_N_INSNS (1) was already added in. This is
11684 why we are subtracting it back out. */
11685 if (outer_code == ZERO_EXTEND)
11687 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
11689 else if (outer_code == SIGN_EXTEND)
11691 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
11693 else if (float_mode_p)
11695 *total = sparc_costs->float_load;
11697 else
11699 *total = sparc_costs->int_load;
11702 return true;
11704 case PLUS:
11705 case MINUS:
11706 if (float_mode_p)
11707 *total = sparc_costs->float_plusminus;
11708 else
11709 *total = COSTS_N_INSNS (1);
11710 return false;
11712 case FMA:
11714 rtx sub;
11716 gcc_assert (float_mode_p);
11717 *total = sparc_costs->float_mul;
11719 sub = XEXP (x, 0);
11720 if (GET_CODE (sub) == NEG)
11721 sub = XEXP (sub, 0);
11722 *total += rtx_cost (sub, mode, FMA, 0, speed);
11724 sub = XEXP (x, 2);
11725 if (GET_CODE (sub) == NEG)
11726 sub = XEXP (sub, 0);
11727 *total += rtx_cost (sub, mode, FMA, 2, speed);
11728 return true;
11731 case MULT:
11732 if (float_mode_p)
11733 *total = sparc_costs->float_mul;
11734 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
11735 *total = COSTS_N_INSNS (25);
11736 else
11738 int bit_cost;
11740 bit_cost = 0;
11741 if (sparc_costs->int_mul_bit_factor)
11743 int nbits;
11745 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
11747 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
11748 for (nbits = 0; value != 0; value &= value - 1)
11749 nbits++;
11751 else
11752 nbits = 7;
11754 if (nbits < 3)
11755 nbits = 3;
11756 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
11757 bit_cost = COSTS_N_INSNS (bit_cost);
11760 if (mode == DImode || !TARGET_HARD_MUL)
11761 *total = sparc_costs->int_mulX + bit_cost;
11762 else
11763 *total = sparc_costs->int_mul + bit_cost;
11765 return false;
11767 case ASHIFT:
11768 case ASHIFTRT:
11769 case LSHIFTRT:
11770 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
11771 return false;
11773 case DIV:
11774 case UDIV:
11775 case MOD:
11776 case UMOD:
11777 if (float_mode_p)
11779 if (mode == DFmode)
11780 *total = sparc_costs->float_div_df;
11781 else
11782 *total = sparc_costs->float_div_sf;
11784 else
11786 if (mode == DImode)
11787 *total = sparc_costs->int_divX;
11788 else
11789 *total = sparc_costs->int_div;
11791 return false;
11793 case NEG:
11794 if (! float_mode_p)
11796 *total = COSTS_N_INSNS (1);
11797 return false;
11799 /* FALLTHRU */
11801 case ABS:
11802 case FLOAT:
11803 case UNSIGNED_FLOAT:
11804 case FIX:
11805 case UNSIGNED_FIX:
11806 case FLOAT_EXTEND:
11807 case FLOAT_TRUNCATE:
11808 *total = sparc_costs->float_move;
11809 return false;
11811 case SQRT:
11812 if (mode == DFmode)
11813 *total = sparc_costs->float_sqrt_df;
11814 else
11815 *total = sparc_costs->float_sqrt_sf;
11816 return false;
11818 case COMPARE:
11819 if (float_mode_p)
11820 *total = sparc_costs->float_cmp;
11821 else
11822 *total = COSTS_N_INSNS (1);
11823 return false;
11825 case IF_THEN_ELSE:
11826 if (float_mode_p)
11827 *total = sparc_costs->float_cmove;
11828 else
11829 *total = sparc_costs->int_cmove;
11830 return false;
11832 case IOR:
11833 /* Handle the NAND vector patterns. */
11834 if (sparc_vector_mode_supported_p (mode)
11835 && GET_CODE (XEXP (x, 0)) == NOT
11836 && GET_CODE (XEXP (x, 1)) == NOT)
11838 *total = COSTS_N_INSNS (1);
11839 return true;
11841 else
11842 return false;
11844 default:
11845 return false;
11849 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
11851 static inline bool
11852 general_or_i64_p (reg_class_t rclass)
11854 return (rclass == GENERAL_REGS || rclass == I64_REGS);
11857 /* Implement TARGET_REGISTER_MOVE_COST. */
11859 static int
11860 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11861 reg_class_t from, reg_class_t to)
11863 bool need_memory = false;
11865 /* This helps postreload CSE to eliminate redundant comparisons. */
11866 if (from == NO_REGS || to == NO_REGS)
11867 return 100;
11869 if (from == FPCC_REGS || to == FPCC_REGS)
11870 need_memory = true;
11871 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
11872 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
11874 if (TARGET_VIS3)
11876 int size = GET_MODE_SIZE (mode);
11877 if (size == 8 || size == 4)
11879 if (! TARGET_ARCH32 || size == 4)
11880 return 4;
11881 else
11882 return 6;
11885 need_memory = true;
11888 if (need_memory)
11890 if (sparc_cpu == PROCESSOR_ULTRASPARC
11891 || sparc_cpu == PROCESSOR_ULTRASPARC3
11892 || sparc_cpu == PROCESSOR_NIAGARA
11893 || sparc_cpu == PROCESSOR_NIAGARA2
11894 || sparc_cpu == PROCESSOR_NIAGARA3
11895 || sparc_cpu == PROCESSOR_NIAGARA4
11896 || sparc_cpu == PROCESSOR_NIAGARA7
11897 || sparc_cpu == PROCESSOR_M8)
11898 return 12;
11900 return 6;
11903 return 2;
11906 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
11907 This is achieved by means of a manual dynamic stack space allocation in
11908 the current frame. We make the assumption that SEQ doesn't contain any
11909 function calls, with the possible exception of calls to the GOT helper. */
11911 static void
11912 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
11914 /* We must preserve the lowest 16 words for the register save area. */
11915 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
11916 /* We really need only 2 words of fresh stack space. */
11917 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
11919 rtx slot
11920 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
11921 SPARC_STACK_BIAS + offset));
11923 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
11924 emit_insn (gen_rtx_SET (slot, reg));
11925 if (reg2)
11926 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
11927 reg2));
11928 emit_insn (seq);
11929 if (reg2)
11930 emit_insn (gen_rtx_SET (reg2,
11931 adjust_address (slot, word_mode, UNITS_PER_WORD)));
11932 emit_insn (gen_rtx_SET (reg, slot));
11933 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
11936 /* Output the assembler code for a thunk function. THUNK_DECL is the
11937 declaration for the thunk function itself, FUNCTION is the decl for
11938 the target function. DELTA is an immediate constant offset to be
11939 added to THIS. If VCALL_OFFSET is nonzero, the word at address
11940 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
11942 static void
11943 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
11944 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
11945 tree function)
11947 rtx this_rtx, funexp;
11948 rtx_insn *insn;
11949 unsigned int int_arg_first;
11951 reload_completed = 1;
11952 epilogue_completed = 1;
11954 emit_note (NOTE_INSN_PROLOGUE_END);
11956 if (TARGET_FLAT)
11958 sparc_leaf_function_p = 1;
11960 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11962 else if (flag_delayed_branch)
11964 /* We will emit a regular sibcall below, so we need to instruct
11965 output_sibcall that we are in a leaf function. */
11966 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
11968 /* This will cause final.c to invoke leaf_renumber_regs so we
11969 must behave as if we were in a not-yet-leafified function. */
11970 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
11972 else
11974 /* We will emit the sibcall manually below, so we will need to
11975 manually spill non-leaf registers. */
11976 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
11978 /* We really are in a leaf function. */
11979 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
11982 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
11983 returns a structure, the structure return pointer is there instead. */
11984 if (TARGET_ARCH64
11985 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
11986 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
11987 else
11988 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
11990 /* Add DELTA. When possible use a plain add, otherwise load it into
11991 a register first. */
11992 if (delta)
11994 rtx delta_rtx = GEN_INT (delta);
11996 if (! SPARC_SIMM13_P (delta))
11998 rtx scratch = gen_rtx_REG (Pmode, 1);
11999 emit_move_insn (scratch, delta_rtx);
12000 delta_rtx = scratch;
12003 /* THIS_RTX += DELTA. */
12004 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12007 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12008 if (vcall_offset)
12010 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12011 rtx scratch = gen_rtx_REG (Pmode, 1);
12013 gcc_assert (vcall_offset < 0);
12015 /* SCRATCH = *THIS_RTX. */
12016 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12018 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12019 may not have any available scratch register at this point. */
12020 if (SPARC_SIMM13_P (vcall_offset))
12022 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12023 else if (! fixed_regs[5]
12024 /* The below sequence is made up of at least 2 insns,
12025 while the default method may need only one. */
12026 && vcall_offset < -8192)
12028 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12029 emit_move_insn (scratch2, vcall_offset_rtx);
12030 vcall_offset_rtx = scratch2;
12032 else
12034 rtx increment = GEN_INT (-4096);
12036 /* VCALL_OFFSET is a negative number whose typical range can be
12037 estimated as -32768..0 in 32-bit mode. In almost all cases
12038 it is therefore cheaper to emit multiple add insns than
12039 spilling and loading the constant into a register (at least
12040 6 insns). */
12041 while (! SPARC_SIMM13_P (vcall_offset))
12043 emit_insn (gen_add2_insn (scratch, increment));
12044 vcall_offset += 4096;
12046 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12049 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12050 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12051 gen_rtx_PLUS (Pmode,
12052 scratch,
12053 vcall_offset_rtx)));
12055 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12056 emit_insn (gen_add2_insn (this_rtx, scratch));
12059 /* Generate a tail call to the target function. */
12060 if (! TREE_USED (function))
12062 assemble_external (function);
12063 TREE_USED (function) = 1;
12065 funexp = XEXP (DECL_RTL (function), 0);
12067 if (flag_delayed_branch)
12069 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12070 insn = emit_call_insn (gen_sibcall (funexp));
12071 SIBLING_CALL_P (insn) = 1;
12073 else
12075 /* The hoops we have to jump through in order to generate a sibcall
12076 without using delay slots... */
12077 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12079 if (flag_pic)
12081 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12082 start_sequence ();
12083 load_got_register (); /* clobbers %o7 */
12084 scratch = sparc_legitimize_pic_address (funexp, scratch);
12085 seq = get_insns ();
12086 end_sequence ();
12087 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12089 else if (TARGET_ARCH32)
12091 emit_insn (gen_rtx_SET (scratch,
12092 gen_rtx_HIGH (SImode, funexp)));
12093 emit_insn (gen_rtx_SET (scratch,
12094 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12096 else /* TARGET_ARCH64 */
12098 switch (sparc_cmodel)
12100 case CM_MEDLOW:
12101 case CM_MEDMID:
12102 /* The destination can serve as a temporary. */
12103 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12104 break;
12106 case CM_MEDANY:
12107 case CM_EMBMEDANY:
12108 /* The destination cannot serve as a temporary. */
12109 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12110 start_sequence ();
12111 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12112 seq = get_insns ();
12113 end_sequence ();
12114 emit_and_preserve (seq, spill_reg, 0);
12115 break;
12117 default:
12118 gcc_unreachable ();
12122 emit_jump_insn (gen_indirect_jump (scratch));
12125 emit_barrier ();
12127 /* Run just enough of rest_of_compilation to get the insns emitted.
12128 There's not really enough bulk here to make other passes such as
12129 instruction scheduling worth while. Note that use_thunk calls
12130 assemble_start_function and assemble_end_function. */
12131 insn = get_insns ();
12132 shorten_branches (insn);
12133 final_start_function (insn, file, 1);
12134 final (insn, file, 1);
12135 final_end_function ();
12137 reload_completed = 0;
12138 epilogue_completed = 0;
12141 /* Return true if sparc_output_mi_thunk would be able to output the
12142 assembler code for the thunk function specified by the arguments
12143 it is passed, and false otherwise. */
12144 static bool
12145 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12146 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12147 HOST_WIDE_INT vcall_offset,
12148 const_tree function ATTRIBUTE_UNUSED)
12150 /* Bound the loop used in the default method above. */
12151 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12154 /* How to allocate a 'struct machine_function'. */
12156 static struct machine_function *
12157 sparc_init_machine_status (void)
12159 return ggc_cleared_alloc<machine_function> ();
12162 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12163 We need to emit DTP-relative relocations. */
12165 static void
12166 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12168 switch (size)
12170 case 4:
12171 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12172 break;
12173 case 8:
12174 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12175 break;
12176 default:
12177 gcc_unreachable ();
12179 output_addr_const (file, x);
12180 fputs (")", file);
12183 /* Do whatever processing is required at the end of a file. */
12185 static void
12186 sparc_file_end (void)
12188 /* If we need to emit the special GOT helper function, do so now. */
12189 if (got_helper_rtx)
12191 const char *name = XSTR (got_helper_rtx, 0);
12192 const char *reg_name = reg_names[GLOBAL_OFFSET_TABLE_REGNUM];
12193 #ifdef DWARF2_UNWIND_INFO
12194 bool do_cfi;
12195 #endif
12197 if (USE_HIDDEN_LINKONCE)
12199 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12200 get_identifier (name),
12201 build_function_type_list (void_type_node,
12202 NULL_TREE));
12203 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12204 NULL_TREE, void_type_node);
12205 TREE_PUBLIC (decl) = 1;
12206 TREE_STATIC (decl) = 1;
12207 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12208 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12209 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12210 resolve_unique_section (decl, 0, flag_function_sections);
12211 allocate_struct_function (decl, true);
12212 cfun->is_thunk = 1;
12213 current_function_decl = decl;
12214 init_varasm_status ();
12215 assemble_start_function (decl, name);
12217 else
12219 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12220 switch_to_section (text_section);
12221 if (align > 0)
12222 ASM_OUTPUT_ALIGN (asm_out_file, align);
12223 ASM_OUTPUT_LABEL (asm_out_file, name);
12226 #ifdef DWARF2_UNWIND_INFO
12227 do_cfi = dwarf2out_do_cfi_asm ();
12228 if (do_cfi)
12229 fprintf (asm_out_file, "\t.cfi_startproc\n");
12230 #endif
12231 if (flag_delayed_branch)
12232 fprintf (asm_out_file, "\tjmp\t%%o7+8\n\t add\t%%o7, %s, %s\n",
12233 reg_name, reg_name);
12234 else
12235 fprintf (asm_out_file, "\tadd\t%%o7, %s, %s\n\tjmp\t%%o7+8\n\t nop\n",
12236 reg_name, reg_name);
12237 #ifdef DWARF2_UNWIND_INFO
12238 if (do_cfi)
12239 fprintf (asm_out_file, "\t.cfi_endproc\n");
12240 #endif
12243 if (NEED_INDICATE_EXEC_STACK)
12244 file_end_indicate_exec_stack ();
12246 #ifdef TARGET_SOLARIS
12247 solaris_file_end ();
12248 #endif
12251 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12252 /* Implement TARGET_MANGLE_TYPE. */
12254 static const char *
12255 sparc_mangle_type (const_tree type)
12257 if (TARGET_ARCH32
12258 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12259 && TARGET_LONG_DOUBLE_128)
12260 return "g";
12262 /* For all other types, use normal C++ mangling. */
12263 return NULL;
12265 #endif
12267 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12268 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12269 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12271 void
12272 sparc_emit_membar_for_model (enum memmodel model,
12273 int load_store, int before_after)
12275 /* Bits for the MEMBAR mmask field. */
12276 const int LoadLoad = 1;
12277 const int StoreLoad = 2;
12278 const int LoadStore = 4;
12279 const int StoreStore = 8;
12281 int mm = 0, implied = 0;
12283 switch (sparc_memory_model)
12285 case SMM_SC:
12286 /* Sequential Consistency. All memory transactions are immediately
12287 visible in sequential execution order. No barriers needed. */
12288 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12289 break;
12291 case SMM_TSO:
12292 /* Total Store Ordering: all memory transactions with store semantics
12293 are followed by an implied StoreStore. */
12294 implied |= StoreStore;
12296 /* If we're not looking for a raw barrer (before+after), then atomic
12297 operations get the benefit of being both load and store. */
12298 if (load_store == 3 && before_after == 1)
12299 implied |= StoreLoad;
12300 /* FALLTHRU */
12302 case SMM_PSO:
12303 /* Partial Store Ordering: all memory transactions with load semantics
12304 are followed by an implied LoadLoad | LoadStore. */
12305 implied |= LoadLoad | LoadStore;
12307 /* If we're not looking for a raw barrer (before+after), then atomic
12308 operations get the benefit of being both load and store. */
12309 if (load_store == 3 && before_after == 2)
12310 implied |= StoreLoad | StoreStore;
12311 /* FALLTHRU */
12313 case SMM_RMO:
12314 /* Relaxed Memory Ordering: no implicit bits. */
12315 break;
12317 default:
12318 gcc_unreachable ();
12321 if (before_after & 1)
12323 if (is_mm_release (model) || is_mm_acq_rel (model)
12324 || is_mm_seq_cst (model))
12326 if (load_store & 1)
12327 mm |= LoadLoad | StoreLoad;
12328 if (load_store & 2)
12329 mm |= LoadStore | StoreStore;
12332 if (before_after & 2)
12334 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12335 || is_mm_seq_cst (model))
12337 if (load_store & 1)
12338 mm |= LoadLoad | LoadStore;
12339 if (load_store & 2)
12340 mm |= StoreLoad | StoreStore;
12344 /* Remove the bits implied by the system memory model. */
12345 mm &= ~implied;
12347 /* For raw barriers (before+after), always emit a barrier.
12348 This will become a compile-time barrier if needed. */
12349 if (mm || before_after == 3)
12350 emit_insn (gen_membar (GEN_INT (mm)));
12353 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12354 compare and swap on the word containing the byte or half-word. */
12356 static void
12357 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12358 rtx oldval, rtx newval)
12360 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12361 rtx addr = gen_reg_rtx (Pmode);
12362 rtx off = gen_reg_rtx (SImode);
12363 rtx oldv = gen_reg_rtx (SImode);
12364 rtx newv = gen_reg_rtx (SImode);
12365 rtx oldvalue = gen_reg_rtx (SImode);
12366 rtx newvalue = gen_reg_rtx (SImode);
12367 rtx res = gen_reg_rtx (SImode);
12368 rtx resv = gen_reg_rtx (SImode);
12369 rtx memsi, val, mask, cc;
12371 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12373 if (Pmode != SImode)
12374 addr1 = gen_lowpart (SImode, addr1);
12375 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12377 memsi = gen_rtx_MEM (SImode, addr);
12378 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12379 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12381 val = copy_to_reg (memsi);
12383 emit_insn (gen_rtx_SET (off,
12384 gen_rtx_XOR (SImode, off,
12385 GEN_INT (GET_MODE (mem) == QImode
12386 ? 3 : 2))));
12388 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12390 if (GET_MODE (mem) == QImode)
12391 mask = force_reg (SImode, GEN_INT (0xff));
12392 else
12393 mask = force_reg (SImode, GEN_INT (0xffff));
12395 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12397 emit_insn (gen_rtx_SET (val,
12398 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12399 val)));
12401 oldval = gen_lowpart (SImode, oldval);
12402 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12404 newval = gen_lowpart_common (SImode, newval);
12405 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12407 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12409 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12411 rtx_code_label *end_label = gen_label_rtx ();
12412 rtx_code_label *loop_label = gen_label_rtx ();
12413 emit_label (loop_label);
12415 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12417 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12419 emit_move_insn (bool_result, const1_rtx);
12421 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12423 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12425 emit_insn (gen_rtx_SET (resv,
12426 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12427 res)));
12429 emit_move_insn (bool_result, const0_rtx);
12431 cc = gen_compare_reg_1 (NE, resv, val);
12432 emit_insn (gen_rtx_SET (val, resv));
12434 /* Use cbranchcc4 to separate the compare and branch! */
12435 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12436 cc, const0_rtx, loop_label));
12438 emit_label (end_label);
12440 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12442 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12444 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12447 /* Expand code to perform a compare-and-swap. */
12449 void
12450 sparc_expand_compare_and_swap (rtx operands[])
12452 rtx bval, retval, mem, oldval, newval;
12453 machine_mode mode;
12454 enum memmodel model;
12456 bval = operands[0];
12457 retval = operands[1];
12458 mem = operands[2];
12459 oldval = operands[3];
12460 newval = operands[4];
12461 model = (enum memmodel) INTVAL (operands[6]);
12462 mode = GET_MODE (mem);
12464 sparc_emit_membar_for_model (model, 3, 1);
12466 if (reg_overlap_mentioned_p (retval, oldval))
12467 oldval = copy_to_reg (oldval);
12469 if (mode == QImode || mode == HImode)
12470 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12471 else
12473 rtx (*gen) (rtx, rtx, rtx, rtx);
12474 rtx x;
12476 if (mode == SImode)
12477 gen = gen_atomic_compare_and_swapsi_1;
12478 else
12479 gen = gen_atomic_compare_and_swapdi_1;
12480 emit_insn (gen (retval, mem, oldval, newval));
12482 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12483 if (x != bval)
12484 convert_move (bval, x, 1);
12487 sparc_emit_membar_for_model (model, 3, 2);
12490 void
12491 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12493 rtx t_1, t_2, t_3;
12495 sel = gen_lowpart (DImode, sel);
12496 switch (vmode)
12498 case E_V2SImode:
12499 /* inp = xxxxxxxAxxxxxxxB */
12500 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12501 NULL_RTX, 1, OPTAB_DIRECT);
12502 /* t_1 = ....xxxxxxxAxxx. */
12503 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12504 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12505 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12506 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12507 /* sel = .......B */
12508 /* t_1 = ...A.... */
12509 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12510 /* sel = ...A...B */
12511 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12512 /* sel = AAAABBBB * 4 */
12513 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12514 /* sel = { A*4, A*4+1, A*4+2, ... } */
12515 break;
12517 case E_V4HImode:
12518 /* inp = xxxAxxxBxxxCxxxD */
12519 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12520 NULL_RTX, 1, OPTAB_DIRECT);
12521 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12522 NULL_RTX, 1, OPTAB_DIRECT);
12523 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12524 NULL_RTX, 1, OPTAB_DIRECT);
12525 /* t_1 = ..xxxAxxxBxxxCxx */
12526 /* t_2 = ....xxxAxxxBxxxC */
12527 /* t_3 = ......xxxAxxxBxx */
12528 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12529 GEN_INT (0x07),
12530 NULL_RTX, 1, OPTAB_DIRECT);
12531 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12532 GEN_INT (0x0700),
12533 NULL_RTX, 1, OPTAB_DIRECT);
12534 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12535 GEN_INT (0x070000),
12536 NULL_RTX, 1, OPTAB_DIRECT);
12537 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12538 GEN_INT (0x07000000),
12539 NULL_RTX, 1, OPTAB_DIRECT);
12540 /* sel = .......D */
12541 /* t_1 = .....C.. */
12542 /* t_2 = ...B.... */
12543 /* t_3 = .A...... */
12544 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12545 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12546 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12547 /* sel = .A.B.C.D */
12548 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12549 /* sel = AABBCCDD * 2 */
12550 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12551 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12552 break;
12554 case E_V8QImode:
12555 /* input = xAxBxCxDxExFxGxH */
12556 sel = expand_simple_binop (DImode, AND, sel,
12557 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12558 | 0x0f0f0f0f),
12559 NULL_RTX, 1, OPTAB_DIRECT);
12560 /* sel = .A.B.C.D.E.F.G.H */
12561 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12562 NULL_RTX, 1, OPTAB_DIRECT);
12563 /* t_1 = ..A.B.C.D.E.F.G. */
12564 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12565 NULL_RTX, 1, OPTAB_DIRECT);
12566 /* sel = .AABBCCDDEEFFGGH */
12567 sel = expand_simple_binop (DImode, AND, sel,
12568 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12569 | 0xff00ff),
12570 NULL_RTX, 1, OPTAB_DIRECT);
12571 /* sel = ..AB..CD..EF..GH */
12572 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12573 NULL_RTX, 1, OPTAB_DIRECT);
12574 /* t_1 = ....AB..CD..EF.. */
12575 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12576 NULL_RTX, 1, OPTAB_DIRECT);
12577 /* sel = ..ABABCDCDEFEFGH */
12578 sel = expand_simple_binop (DImode, AND, sel,
12579 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12580 NULL_RTX, 1, OPTAB_DIRECT);
12581 /* sel = ....ABCD....EFGH */
12582 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12583 NULL_RTX, 1, OPTAB_DIRECT);
12584 /* t_1 = ........ABCD.... */
12585 sel = gen_lowpart (SImode, sel);
12586 t_1 = gen_lowpart (SImode, t_1);
12587 break;
12589 default:
12590 gcc_unreachable ();
12593 /* Always perform the final addition/merge within the bmask insn. */
12594 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12597 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12599 static bool
12600 sparc_frame_pointer_required (void)
12602 /* If the stack pointer is dynamically modified in the function, it cannot
12603 serve as the frame pointer. */
12604 if (cfun->calls_alloca)
12605 return true;
12607 /* If the function receives nonlocal gotos, it needs to save the frame
12608 pointer in the nonlocal_goto_save_area object. */
12609 if (cfun->has_nonlocal_label)
12610 return true;
12612 /* In flat mode, that's it. */
12613 if (TARGET_FLAT)
12614 return false;
12616 /* Otherwise, the frame pointer is required if the function isn't leaf, but
12617 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
12618 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
12621 /* The way this is structured, we can't eliminate SFP in favor of SP
12622 if the frame pointer is required: we want to use the SFP->HFP elimination
12623 in that case. But the test in update_eliminables doesn't know we are
12624 assuming below that we only do the former elimination. */
12626 static bool
12627 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
12629 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
12632 /* Return the hard frame pointer directly to bypass the stack bias. */
12634 static rtx
12635 sparc_builtin_setjmp_frame_value (void)
12637 return hard_frame_pointer_rtx;
12640 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
12641 they won't be allocated. */
12643 static void
12644 sparc_conditional_register_usage (void)
12646 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
12648 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12649 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
12651 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
12652 /* then honor it. */
12653 if (TARGET_ARCH32 && fixed_regs[5])
12654 fixed_regs[5] = 1;
12655 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
12656 fixed_regs[5] = 0;
12657 if (! TARGET_V9)
12659 int regno;
12660 for (regno = SPARC_FIRST_V9_FP_REG;
12661 regno <= SPARC_LAST_V9_FP_REG;
12662 regno++)
12663 fixed_regs[regno] = 1;
12664 /* %fcc0 is used by v8 and v9. */
12665 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
12666 regno <= SPARC_LAST_V9_FCC_REG;
12667 regno++)
12668 fixed_regs[regno] = 1;
12670 if (! TARGET_FPU)
12672 int regno;
12673 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
12674 fixed_regs[regno] = 1;
12676 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
12677 /* then honor it. Likewise with g3 and g4. */
12678 if (fixed_regs[2] == 2)
12679 fixed_regs[2] = ! TARGET_APP_REGS;
12680 if (fixed_regs[3] == 2)
12681 fixed_regs[3] = ! TARGET_APP_REGS;
12682 if (TARGET_ARCH32 && fixed_regs[4] == 2)
12683 fixed_regs[4] = ! TARGET_APP_REGS;
12684 else if (TARGET_CM_EMBMEDANY)
12685 fixed_regs[4] = 1;
12686 else if (fixed_regs[4] == 2)
12687 fixed_regs[4] = 0;
12688 if (TARGET_FLAT)
12690 int regno;
12691 /* Disable leaf functions. */
12692 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
12693 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
12694 leaf_reg_remap [regno] = regno;
12696 if (TARGET_VIS)
12697 global_regs[SPARC_GSR_REG] = 1;
12700 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
12702 - We can't load constants into FP registers.
12703 - We can't load FP constants into integer registers when soft-float,
12704 because there is no soft-float pattern with a r/F constraint.
12705 - We can't load FP constants into integer registers for TFmode unless
12706 it is 0.0L, because there is no movtf pattern with a r/F constraint.
12707 - Try and reload integer constants (symbolic or otherwise) back into
12708 registers directly, rather than having them dumped to memory. */
12710 static reg_class_t
12711 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
12713 machine_mode mode = GET_MODE (x);
12714 if (CONSTANT_P (x))
12716 if (FP_REG_CLASS_P (rclass)
12717 || rclass == GENERAL_OR_FP_REGS
12718 || rclass == GENERAL_OR_EXTRA_FP_REGS
12719 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
12720 || (mode == TFmode && ! const_zero_operand (x, mode)))
12721 return NO_REGS;
12723 if (GET_MODE_CLASS (mode) == MODE_INT)
12724 return GENERAL_REGS;
12726 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
12728 if (! FP_REG_CLASS_P (rclass)
12729 || !(const_zero_operand (x, mode)
12730 || const_all_ones_operand (x, mode)))
12731 return NO_REGS;
12735 if (TARGET_VIS3
12736 && ! TARGET_ARCH64
12737 && (rclass == EXTRA_FP_REGS
12738 || rclass == GENERAL_OR_EXTRA_FP_REGS))
12740 int regno = true_regnum (x);
12742 if (SPARC_INT_REG_P (regno))
12743 return (rclass == EXTRA_FP_REGS
12744 ? FP_REGS : GENERAL_OR_FP_REGS);
12747 return rclass;
12750 /* Return true if we use LRA instead of reload pass. */
12752 static bool
12753 sparc_lra_p (void)
12755 return TARGET_LRA;
12758 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
12759 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
12761 const char *
12762 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
12764 char mulstr[32];
12766 gcc_assert (! TARGET_ARCH64);
12768 if (sparc_check_64 (operands[1], insn) <= 0)
12769 output_asm_insn ("srl\t%L1, 0, %L1", operands);
12770 if (which_alternative == 1)
12771 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
12772 if (GET_CODE (operands[2]) == CONST_INT)
12774 if (which_alternative == 1)
12776 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12777 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
12778 output_asm_insn (mulstr, operands);
12779 return "srlx\t%L0, 32, %H0";
12781 else
12783 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12784 output_asm_insn ("or\t%L1, %3, %3", operands);
12785 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
12786 output_asm_insn (mulstr, operands);
12787 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12788 return "mov\t%3, %L0";
12791 else if (rtx_equal_p (operands[1], operands[2]))
12793 if (which_alternative == 1)
12795 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12796 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
12797 output_asm_insn (mulstr, operands);
12798 return "srlx\t%L0, 32, %H0";
12800 else
12802 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12803 output_asm_insn ("or\t%L1, %3, %3", operands);
12804 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
12805 output_asm_insn (mulstr, operands);
12806 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12807 return "mov\t%3, %L0";
12810 if (sparc_check_64 (operands[2], insn) <= 0)
12811 output_asm_insn ("srl\t%L2, 0, %L2", operands);
12812 if (which_alternative == 1)
12814 output_asm_insn ("or\t%L1, %H1, %H1", operands);
12815 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
12816 output_asm_insn ("or\t%L2, %L1, %L1", operands);
12817 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
12818 output_asm_insn (mulstr, operands);
12819 return "srlx\t%L0, 32, %H0";
12821 else
12823 output_asm_insn ("sllx\t%H1, 32, %3", operands);
12824 output_asm_insn ("sllx\t%H2, 32, %4", operands);
12825 output_asm_insn ("or\t%L1, %3, %3", operands);
12826 output_asm_insn ("or\t%L2, %4, %4", operands);
12827 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
12828 output_asm_insn (mulstr, operands);
12829 output_asm_insn ("srlx\t%3, 32, %H0", operands);
12830 return "mov\t%3, %L0";
12834 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12835 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
12836 and INNER_MODE are the modes describing TARGET. */
12838 static void
12839 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
12840 machine_mode inner_mode)
12842 rtx t1, final_insn, sel;
12843 int bmask;
12845 t1 = gen_reg_rtx (mode);
12847 elt = convert_modes (SImode, inner_mode, elt, true);
12848 emit_move_insn (gen_lowpart(SImode, t1), elt);
12850 switch (mode)
12852 case E_V2SImode:
12853 final_insn = gen_bshufflev2si_vis (target, t1, t1);
12854 bmask = 0x45674567;
12855 break;
12856 case E_V4HImode:
12857 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
12858 bmask = 0x67676767;
12859 break;
12860 case E_V8QImode:
12861 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
12862 bmask = 0x77777777;
12863 break;
12864 default:
12865 gcc_unreachable ();
12868 sel = force_reg (SImode, GEN_INT (bmask));
12869 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
12870 emit_insn (final_insn);
12873 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12874 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
12876 static void
12877 vector_init_fpmerge (rtx target, rtx elt)
12879 rtx t1, t2, t2_low, t3, t3_low;
12881 t1 = gen_reg_rtx (V4QImode);
12882 elt = convert_modes (SImode, QImode, elt, true);
12883 emit_move_insn (gen_lowpart (SImode, t1), elt);
12885 t2 = gen_reg_rtx (V8QImode);
12886 t2_low = gen_lowpart (V4QImode, t2);
12887 emit_insn (gen_fpmerge_vis (t2, t1, t1));
12889 t3 = gen_reg_rtx (V8QImode);
12890 t3_low = gen_lowpart (V4QImode, t3);
12891 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
12893 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
12896 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
12897 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
12899 static void
12900 vector_init_faligndata (rtx target, rtx elt)
12902 rtx t1 = gen_reg_rtx (V4HImode);
12903 int i;
12905 elt = convert_modes (SImode, HImode, elt, true);
12906 emit_move_insn (gen_lowpart (SImode, t1), elt);
12908 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
12909 force_reg (SImode, GEN_INT (6)),
12910 const0_rtx));
12912 for (i = 0; i < 4; i++)
12913 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
12916 /* Emit code to initialize TARGET to values for individual fields VALS. */
12918 void
12919 sparc_expand_vector_init (rtx target, rtx vals)
12921 const machine_mode mode = GET_MODE (target);
12922 const machine_mode inner_mode = GET_MODE_INNER (mode);
12923 const int n_elts = GET_MODE_NUNITS (mode);
12924 int i, n_var = 0;
12925 bool all_same = true;
12926 rtx mem;
12928 for (i = 0; i < n_elts; i++)
12930 rtx x = XVECEXP (vals, 0, i);
12931 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
12932 n_var++;
12934 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12935 all_same = false;
12938 if (n_var == 0)
12940 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
12941 return;
12944 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
12946 if (GET_MODE_SIZE (inner_mode) == 4)
12948 emit_move_insn (gen_lowpart (SImode, target),
12949 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
12950 return;
12952 else if (GET_MODE_SIZE (inner_mode) == 8)
12954 emit_move_insn (gen_lowpart (DImode, target),
12955 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
12956 return;
12959 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
12960 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
12962 emit_move_insn (gen_highpart (word_mode, target),
12963 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
12964 emit_move_insn (gen_lowpart (word_mode, target),
12965 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
12966 return;
12969 if (all_same && GET_MODE_SIZE (mode) == 8)
12971 if (TARGET_VIS2)
12973 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
12974 return;
12976 if (mode == V8QImode)
12978 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
12979 return;
12981 if (mode == V4HImode)
12983 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
12984 return;
12988 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12989 for (i = 0; i < n_elts; i++)
12990 emit_move_insn (adjust_address_nv (mem, inner_mode,
12991 i * GET_MODE_SIZE (inner_mode)),
12992 XVECEXP (vals, 0, i));
12993 emit_move_insn (target, mem);
12996 /* Implement TARGET_SECONDARY_RELOAD. */
12998 static reg_class_t
12999 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13000 machine_mode mode, secondary_reload_info *sri)
13002 enum reg_class rclass = (enum reg_class) rclass_i;
13004 sri->icode = CODE_FOR_nothing;
13005 sri->extra_cost = 0;
13007 /* We need a temporary when loading/storing a HImode/QImode value
13008 between memory and the FPU registers. This can happen when combine puts
13009 a paradoxical subreg in a float/fix conversion insn. */
13010 if (FP_REG_CLASS_P (rclass)
13011 && (mode == HImode || mode == QImode)
13012 && (GET_CODE (x) == MEM
13013 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13014 && true_regnum (x) == -1)))
13015 return GENERAL_REGS;
13017 /* On 32-bit we need a temporary when loading/storing a DFmode value
13018 between unaligned memory and the upper FPU registers. */
13019 if (TARGET_ARCH32
13020 && rclass == EXTRA_FP_REGS
13021 && mode == DFmode
13022 && GET_CODE (x) == MEM
13023 && ! mem_min_alignment (x, 8))
13024 return FP_REGS;
13026 if (((TARGET_CM_MEDANY
13027 && symbolic_operand (x, mode))
13028 || (TARGET_CM_EMBMEDANY
13029 && text_segment_operand (x, mode)))
13030 && ! flag_pic)
13032 if (in_p)
13033 sri->icode = direct_optab_handler (reload_in_optab, mode);
13034 else
13035 sri->icode = direct_optab_handler (reload_out_optab, mode);
13036 return NO_REGS;
13039 if (TARGET_VIS3 && TARGET_ARCH32)
13041 int regno = true_regnum (x);
13043 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13044 to move 8-byte values in 4-byte pieces. This only works via
13045 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13046 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13047 an FP_REGS intermediate move. */
13048 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13049 || ((general_or_i64_p (rclass)
13050 || rclass == GENERAL_OR_FP_REGS)
13051 && SPARC_FP_REG_P (regno)))
13053 sri->extra_cost = 2;
13054 return FP_REGS;
13058 return NO_REGS;
13061 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13063 On SPARC when not VIS3 it is not possible to directly move data
13064 between GENERAL_REGS and FP_REGS. */
13066 static bool
13067 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13068 reg_class_t class2)
13070 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13071 && (! TARGET_VIS3
13072 || GET_MODE_SIZE (mode) > 8
13073 || GET_MODE_SIZE (mode) < 4));
13076 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13078 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13079 because the movsi and movsf patterns don't handle r/f moves.
13080 For v8 we copy the default definition. */
13082 static machine_mode
13083 sparc_secondary_memory_needed_mode (machine_mode mode)
13085 if (TARGET_ARCH64)
13087 if (GET_MODE_BITSIZE (mode) < 32)
13088 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13089 return mode;
13091 else
13093 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13094 return mode_for_size (BITS_PER_WORD,
13095 GET_MODE_CLASS (mode), 0).require ();
13096 return mode;
13100 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13101 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13103 bool
13104 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13106 enum rtx_code rc = GET_CODE (operands[1]);
13107 machine_mode cmp_mode;
13108 rtx cc_reg, dst, cmp;
13110 cmp = operands[1];
13111 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13112 return false;
13114 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13115 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13117 cmp_mode = GET_MODE (XEXP (cmp, 0));
13118 rc = GET_CODE (cmp);
13120 dst = operands[0];
13121 if (! rtx_equal_p (operands[2], dst)
13122 && ! rtx_equal_p (operands[3], dst))
13124 if (reg_overlap_mentioned_p (dst, cmp))
13125 dst = gen_reg_rtx (mode);
13127 emit_move_insn (dst, operands[3]);
13129 else if (operands[2] == dst)
13131 operands[2] = operands[3];
13133 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13134 rc = reverse_condition_maybe_unordered (rc);
13135 else
13136 rc = reverse_condition (rc);
13139 if (XEXP (cmp, 1) == const0_rtx
13140 && GET_CODE (XEXP (cmp, 0)) == REG
13141 && cmp_mode == DImode
13142 && v9_regcmp_p (rc))
13143 cc_reg = XEXP (cmp, 0);
13144 else
13145 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13147 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13149 emit_insn (gen_rtx_SET (dst,
13150 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13152 if (dst != operands[0])
13153 emit_move_insn (operands[0], dst);
13155 return true;
13158 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13159 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13160 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13161 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13162 code to be used for the condition mask. */
13164 void
13165 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13167 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13168 enum rtx_code code = GET_CODE (operands[3]);
13170 mask = gen_reg_rtx (Pmode);
13171 cop0 = operands[4];
13172 cop1 = operands[5];
13173 if (code == LT || code == GE)
13175 rtx t;
13177 code = swap_condition (code);
13178 t = cop0; cop0 = cop1; cop1 = t;
13181 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13183 fcmp = gen_rtx_UNSPEC (Pmode,
13184 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13185 fcode);
13187 cmask = gen_rtx_UNSPEC (DImode,
13188 gen_rtvec (2, mask, gsr),
13189 ccode);
13191 bshuf = gen_rtx_UNSPEC (mode,
13192 gen_rtvec (3, operands[1], operands[2], gsr),
13193 UNSPEC_BSHUFFLE);
13195 emit_insn (gen_rtx_SET (mask, fcmp));
13196 emit_insn (gen_rtx_SET (gsr, cmask));
13198 emit_insn (gen_rtx_SET (operands[0], bshuf));
13201 /* On sparc, any mode which naturally allocates into the float
13202 registers should return 4 here. */
13204 unsigned int
13205 sparc_regmode_natural_size (machine_mode mode)
13207 int size = UNITS_PER_WORD;
13209 if (TARGET_ARCH64)
13211 enum mode_class mclass = GET_MODE_CLASS (mode);
13213 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13214 size = 4;
13217 return size;
13220 /* Implement TARGET_HARD_REGNO_NREGS.
13222 On SPARC, ordinary registers hold 32 bits worth; this means both
13223 integer and floating point registers. On v9, integer regs hold 64
13224 bits worth; floating point regs hold 32 bits worth (this includes the
13225 new fp regs as even the odd ones are included in the hard register
13226 count). */
13228 static unsigned int
13229 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13231 if (regno == SPARC_GSR_REG)
13232 return 1;
13233 if (TARGET_ARCH64)
13235 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13236 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13237 return CEIL (GET_MODE_SIZE (mode), 4);
13239 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13242 /* Implement TARGET_HARD_REGNO_MODE_OK.
13244 ??? Because of the funny way we pass parameters we should allow certain
13245 ??? types of float/complex values to be in integer registers during
13246 ??? RTL generation. This only matters on arch32. */
13248 static bool
13249 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13251 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13254 /* Implement TARGET_MODES_TIEABLE_P.
13256 For V9 we have to deal with the fact that only the lower 32 floating
13257 point registers are 32-bit addressable. */
13259 static bool
13260 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13262 enum mode_class mclass1, mclass2;
13263 unsigned short size1, size2;
13265 if (mode1 == mode2)
13266 return true;
13268 mclass1 = GET_MODE_CLASS (mode1);
13269 mclass2 = GET_MODE_CLASS (mode2);
13270 if (mclass1 != mclass2)
13271 return false;
13273 if (! TARGET_V9)
13274 return true;
13276 /* Classes are the same and we are V9 so we have to deal with upper
13277 vs. lower floating point registers. If one of the modes is a
13278 4-byte mode, and the other is not, we have to mark them as not
13279 tieable because only the lower 32 floating point register are
13280 addressable 32-bits at a time.
13282 We can't just test explicitly for SFmode, otherwise we won't
13283 cover the vector mode cases properly. */
13285 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13286 return true;
13288 size1 = GET_MODE_SIZE (mode1);
13289 size2 = GET_MODE_SIZE (mode2);
13290 if ((size1 > 4 && size2 == 4)
13291 || (size2 > 4 && size1 == 4))
13292 return false;
13294 return true;
13297 /* Implement TARGET_CSTORE_MODE. */
13299 static scalar_int_mode
13300 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13302 return (TARGET_ARCH64 ? DImode : SImode);
13305 /* Return the compound expression made of T1 and T2. */
13307 static inline tree
13308 compound_expr (tree t1, tree t2)
13310 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13313 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13315 static void
13316 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13318 if (!TARGET_FPU)
13319 return;
13321 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13322 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13324 /* We generate the equivalent of feholdexcept (&fenv_var):
13326 unsigned int fenv_var;
13327 __builtin_store_fsr (&fenv_var);
13329 unsigned int tmp1_var;
13330 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13332 __builtin_load_fsr (&tmp1_var); */
13334 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13335 TREE_ADDRESSABLE (fenv_var) = 1;
13336 tree fenv_addr = build_fold_addr_expr (fenv_var);
13337 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13338 tree hold_stfsr
13339 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13340 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13342 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13343 TREE_ADDRESSABLE (tmp1_var) = 1;
13344 tree masked_fenv_var
13345 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13346 build_int_cst (unsigned_type_node,
13347 ~(accrued_exception_mask | trap_enable_mask)));
13348 tree hold_mask
13349 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13350 NULL_TREE, NULL_TREE);
13352 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13353 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13354 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13356 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13358 /* We reload the value of tmp1_var to clear the exceptions:
13360 __builtin_load_fsr (&tmp1_var); */
13362 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13364 /* We generate the equivalent of feupdateenv (&fenv_var):
13366 unsigned int tmp2_var;
13367 __builtin_store_fsr (&tmp2_var);
13369 __builtin_load_fsr (&fenv_var);
13371 if (SPARC_LOW_FE_EXCEPT_VALUES)
13372 tmp2_var >>= 5;
13373 __atomic_feraiseexcept ((int) tmp2_var); */
13375 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13376 TREE_ADDRESSABLE (tmp2_var) = 1;
13377 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13378 tree update_stfsr
13379 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13380 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13382 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13384 tree atomic_feraiseexcept
13385 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13386 tree update_call
13387 = build_call_expr (atomic_feraiseexcept, 1,
13388 fold_convert (integer_type_node, tmp2_var));
13390 if (SPARC_LOW_FE_EXCEPT_VALUES)
13392 tree shifted_tmp2_var
13393 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13394 build_int_cst (unsigned_type_node, 5));
13395 tree update_shift
13396 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13397 update_call = compound_expr (update_shift, update_call);
13400 *update
13401 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13404 #include "gt-sparc.h"