testsuite: remove SPE tests.
[official-gcc.git] / gcc / config / sparc / sparc.c
blob2780b4243b51514c71f813b8835892407c7301dd
1 /* Subroutines for insn-output.c for SPARC.
2 Copyright (C) 1987-2020 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
65 /* This file should be included last. */
66 #include "target-def.h"
68 /* Processor costs */
70 struct processor_costs {
71 /* Integer load */
72 const int int_load;
74 /* Integer signed load */
75 const int int_sload;
77 /* Integer zeroed load */
78 const int int_zload;
80 /* Float load */
81 const int float_load;
83 /* fmov, fneg, fabs */
84 const int float_move;
86 /* fadd, fsub */
87 const int float_plusminus;
89 /* fcmp */
90 const int float_cmp;
92 /* fmov, fmovr */
93 const int float_cmove;
95 /* fmul */
96 const int float_mul;
98 /* fdivs */
99 const int float_div_sf;
101 /* fdivd */
102 const int float_div_df;
104 /* fsqrts */
105 const int float_sqrt_sf;
107 /* fsqrtd */
108 const int float_sqrt_df;
110 /* umul/smul */
111 const int int_mul;
113 /* mulX */
114 const int int_mulX;
116 /* integer multiply cost for each bit set past the most
117 significant 3, so the formula for multiply cost becomes:
119 if (rs1 < 0)
120 highest_bit = highest_clear_bit(rs1);
121 else
122 highest_bit = highest_set_bit(rs1);
123 if (highest_bit < 3)
124 highest_bit = 3;
125 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
127 A value of zero indicates that the multiply costs is fixed,
128 and not variable. */
129 const int int_mul_bit_factor;
131 /* udiv/sdiv */
132 const int int_div;
134 /* divX */
135 const int int_divX;
137 /* movcc, movr */
138 const int int_cmove;
140 /* penalty for shifts, due to scheduling rules etc. */
141 const int shift_penalty;
143 /* cost of a (predictable) branch. */
144 const int branch_cost;
147 static const
148 struct processor_costs cypress_costs = {
149 COSTS_N_INSNS (2), /* int load */
150 COSTS_N_INSNS (2), /* int signed load */
151 COSTS_N_INSNS (2), /* int zeroed load */
152 COSTS_N_INSNS (2), /* float load */
153 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
154 COSTS_N_INSNS (5), /* fadd, fsub */
155 COSTS_N_INSNS (1), /* fcmp */
156 COSTS_N_INSNS (1), /* fmov, fmovr */
157 COSTS_N_INSNS (7), /* fmul */
158 COSTS_N_INSNS (37), /* fdivs */
159 COSTS_N_INSNS (37), /* fdivd */
160 COSTS_N_INSNS (63), /* fsqrts */
161 COSTS_N_INSNS (63), /* fsqrtd */
162 COSTS_N_INSNS (1), /* imul */
163 COSTS_N_INSNS (1), /* imulX */
164 0, /* imul bit factor */
165 COSTS_N_INSNS (1), /* idiv */
166 COSTS_N_INSNS (1), /* idivX */
167 COSTS_N_INSNS (1), /* movcc/movr */
168 0, /* shift penalty */
169 3 /* branch cost */
172 static const
173 struct processor_costs supersparc_costs = {
174 COSTS_N_INSNS (1), /* int load */
175 COSTS_N_INSNS (1), /* int signed load */
176 COSTS_N_INSNS (1), /* int zeroed load */
177 COSTS_N_INSNS (0), /* float load */
178 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
179 COSTS_N_INSNS (3), /* fadd, fsub */
180 COSTS_N_INSNS (3), /* fcmp */
181 COSTS_N_INSNS (1), /* fmov, fmovr */
182 COSTS_N_INSNS (3), /* fmul */
183 COSTS_N_INSNS (6), /* fdivs */
184 COSTS_N_INSNS (9), /* fdivd */
185 COSTS_N_INSNS (12), /* fsqrts */
186 COSTS_N_INSNS (12), /* fsqrtd */
187 COSTS_N_INSNS (4), /* imul */
188 COSTS_N_INSNS (4), /* imulX */
189 0, /* imul bit factor */
190 COSTS_N_INSNS (4), /* idiv */
191 COSTS_N_INSNS (4), /* idivX */
192 COSTS_N_INSNS (1), /* movcc/movr */
193 1, /* shift penalty */
194 3 /* branch cost */
197 static const
198 struct processor_costs hypersparc_costs = {
199 COSTS_N_INSNS (1), /* int load */
200 COSTS_N_INSNS (1), /* int signed load */
201 COSTS_N_INSNS (1), /* int zeroed load */
202 COSTS_N_INSNS (1), /* float load */
203 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
204 COSTS_N_INSNS (1), /* fadd, fsub */
205 COSTS_N_INSNS (1), /* fcmp */
206 COSTS_N_INSNS (1), /* fmov, fmovr */
207 COSTS_N_INSNS (1), /* fmul */
208 COSTS_N_INSNS (8), /* fdivs */
209 COSTS_N_INSNS (12), /* fdivd */
210 COSTS_N_INSNS (17), /* fsqrts */
211 COSTS_N_INSNS (17), /* fsqrtd */
212 COSTS_N_INSNS (17), /* imul */
213 COSTS_N_INSNS (17), /* imulX */
214 0, /* imul bit factor */
215 COSTS_N_INSNS (17), /* idiv */
216 COSTS_N_INSNS (17), /* idivX */
217 COSTS_N_INSNS (1), /* movcc/movr */
218 0, /* shift penalty */
219 3 /* branch cost */
222 static const
223 struct processor_costs leon_costs = {
224 COSTS_N_INSNS (1), /* int load */
225 COSTS_N_INSNS (1), /* int signed load */
226 COSTS_N_INSNS (1), /* int zeroed load */
227 COSTS_N_INSNS (1), /* float load */
228 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
229 COSTS_N_INSNS (1), /* fadd, fsub */
230 COSTS_N_INSNS (1), /* fcmp */
231 COSTS_N_INSNS (1), /* fmov, fmovr */
232 COSTS_N_INSNS (1), /* fmul */
233 COSTS_N_INSNS (15), /* fdivs */
234 COSTS_N_INSNS (15), /* fdivd */
235 COSTS_N_INSNS (23), /* fsqrts */
236 COSTS_N_INSNS (23), /* fsqrtd */
237 COSTS_N_INSNS (5), /* imul */
238 COSTS_N_INSNS (5), /* imulX */
239 0, /* imul bit factor */
240 COSTS_N_INSNS (5), /* idiv */
241 COSTS_N_INSNS (5), /* idivX */
242 COSTS_N_INSNS (1), /* movcc/movr */
243 0, /* shift penalty */
244 3 /* branch cost */
247 static const
248 struct processor_costs leon3_costs = {
249 COSTS_N_INSNS (1), /* int load */
250 COSTS_N_INSNS (1), /* int signed load */
251 COSTS_N_INSNS (1), /* int zeroed load */
252 COSTS_N_INSNS (1), /* float load */
253 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
254 COSTS_N_INSNS (1), /* fadd, fsub */
255 COSTS_N_INSNS (1), /* fcmp */
256 COSTS_N_INSNS (1), /* fmov, fmovr */
257 COSTS_N_INSNS (1), /* fmul */
258 COSTS_N_INSNS (14), /* fdivs */
259 COSTS_N_INSNS (15), /* fdivd */
260 COSTS_N_INSNS (22), /* fsqrts */
261 COSTS_N_INSNS (23), /* fsqrtd */
262 COSTS_N_INSNS (5), /* imul */
263 COSTS_N_INSNS (5), /* imulX */
264 0, /* imul bit factor */
265 COSTS_N_INSNS (35), /* idiv */
266 COSTS_N_INSNS (35), /* idivX */
267 COSTS_N_INSNS (1), /* movcc/movr */
268 0, /* shift penalty */
269 3 /* branch cost */
272 static const
273 struct processor_costs sparclet_costs = {
274 COSTS_N_INSNS (3), /* int load */
275 COSTS_N_INSNS (3), /* int signed load */
276 COSTS_N_INSNS (1), /* int zeroed load */
277 COSTS_N_INSNS (1), /* float load */
278 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
279 COSTS_N_INSNS (1), /* fadd, fsub */
280 COSTS_N_INSNS (1), /* fcmp */
281 COSTS_N_INSNS (1), /* fmov, fmovr */
282 COSTS_N_INSNS (1), /* fmul */
283 COSTS_N_INSNS (1), /* fdivs */
284 COSTS_N_INSNS (1), /* fdivd */
285 COSTS_N_INSNS (1), /* fsqrts */
286 COSTS_N_INSNS (1), /* fsqrtd */
287 COSTS_N_INSNS (5), /* imul */
288 COSTS_N_INSNS (5), /* imulX */
289 0, /* imul bit factor */
290 COSTS_N_INSNS (5), /* idiv */
291 COSTS_N_INSNS (5), /* idivX */
292 COSTS_N_INSNS (1), /* movcc/movr */
293 0, /* shift penalty */
294 3 /* branch cost */
297 static const
298 struct processor_costs ultrasparc_costs = {
299 COSTS_N_INSNS (2), /* int load */
300 COSTS_N_INSNS (3), /* int signed load */
301 COSTS_N_INSNS (2), /* int zeroed load */
302 COSTS_N_INSNS (2), /* float load */
303 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
304 COSTS_N_INSNS (4), /* fadd, fsub */
305 COSTS_N_INSNS (1), /* fcmp */
306 COSTS_N_INSNS (2), /* fmov, fmovr */
307 COSTS_N_INSNS (4), /* fmul */
308 COSTS_N_INSNS (13), /* fdivs */
309 COSTS_N_INSNS (23), /* fdivd */
310 COSTS_N_INSNS (13), /* fsqrts */
311 COSTS_N_INSNS (23), /* fsqrtd */
312 COSTS_N_INSNS (4), /* imul */
313 COSTS_N_INSNS (4), /* imulX */
314 2, /* imul bit factor */
315 COSTS_N_INSNS (37), /* idiv */
316 COSTS_N_INSNS (68), /* idivX */
317 COSTS_N_INSNS (2), /* movcc/movr */
318 2, /* shift penalty */
319 2 /* branch cost */
322 static const
323 struct processor_costs ultrasparc3_costs = {
324 COSTS_N_INSNS (2), /* int load */
325 COSTS_N_INSNS (3), /* int signed load */
326 COSTS_N_INSNS (3), /* int zeroed load */
327 COSTS_N_INSNS (2), /* float load */
328 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
329 COSTS_N_INSNS (4), /* fadd, fsub */
330 COSTS_N_INSNS (5), /* fcmp */
331 COSTS_N_INSNS (3), /* fmov, fmovr */
332 COSTS_N_INSNS (4), /* fmul */
333 COSTS_N_INSNS (17), /* fdivs */
334 COSTS_N_INSNS (20), /* fdivd */
335 COSTS_N_INSNS (20), /* fsqrts */
336 COSTS_N_INSNS (29), /* fsqrtd */
337 COSTS_N_INSNS (6), /* imul */
338 COSTS_N_INSNS (6), /* imulX */
339 0, /* imul bit factor */
340 COSTS_N_INSNS (40), /* idiv */
341 COSTS_N_INSNS (71), /* idivX */
342 COSTS_N_INSNS (2), /* movcc/movr */
343 0, /* shift penalty */
344 2 /* branch cost */
347 static const
348 struct processor_costs niagara_costs = {
349 COSTS_N_INSNS (3), /* int load */
350 COSTS_N_INSNS (3), /* int signed load */
351 COSTS_N_INSNS (3), /* int zeroed load */
352 COSTS_N_INSNS (9), /* float load */
353 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
354 COSTS_N_INSNS (8), /* fadd, fsub */
355 COSTS_N_INSNS (26), /* fcmp */
356 COSTS_N_INSNS (8), /* fmov, fmovr */
357 COSTS_N_INSNS (29), /* fmul */
358 COSTS_N_INSNS (54), /* fdivs */
359 COSTS_N_INSNS (83), /* fdivd */
360 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
361 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
362 COSTS_N_INSNS (11), /* imul */
363 COSTS_N_INSNS (11), /* imulX */
364 0, /* imul bit factor */
365 COSTS_N_INSNS (72), /* idiv */
366 COSTS_N_INSNS (72), /* idivX */
367 COSTS_N_INSNS (1), /* movcc/movr */
368 0, /* shift penalty */
369 4 /* branch cost */
372 static const
373 struct processor_costs niagara2_costs = {
374 COSTS_N_INSNS (3), /* int load */
375 COSTS_N_INSNS (3), /* int signed load */
376 COSTS_N_INSNS (3), /* int zeroed load */
377 COSTS_N_INSNS (3), /* float load */
378 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
379 COSTS_N_INSNS (6), /* fadd, fsub */
380 COSTS_N_INSNS (6), /* fcmp */
381 COSTS_N_INSNS (6), /* fmov, fmovr */
382 COSTS_N_INSNS (6), /* fmul */
383 COSTS_N_INSNS (19), /* fdivs */
384 COSTS_N_INSNS (33), /* fdivd */
385 COSTS_N_INSNS (19), /* fsqrts */
386 COSTS_N_INSNS (33), /* fsqrtd */
387 COSTS_N_INSNS (5), /* imul */
388 COSTS_N_INSNS (5), /* imulX */
389 0, /* imul bit factor */
390 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
391 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
392 COSTS_N_INSNS (1), /* movcc/movr */
393 0, /* shift penalty */
394 5 /* branch cost */
397 static const
398 struct processor_costs niagara3_costs = {
399 COSTS_N_INSNS (3), /* int load */
400 COSTS_N_INSNS (3), /* int signed load */
401 COSTS_N_INSNS (3), /* int zeroed load */
402 COSTS_N_INSNS (3), /* float load */
403 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
404 COSTS_N_INSNS (9), /* fadd, fsub */
405 COSTS_N_INSNS (9), /* fcmp */
406 COSTS_N_INSNS (9), /* fmov, fmovr */
407 COSTS_N_INSNS (9), /* fmul */
408 COSTS_N_INSNS (23), /* fdivs */
409 COSTS_N_INSNS (37), /* fdivd */
410 COSTS_N_INSNS (23), /* fsqrts */
411 COSTS_N_INSNS (37), /* fsqrtd */
412 COSTS_N_INSNS (9), /* imul */
413 COSTS_N_INSNS (9), /* imulX */
414 0, /* imul bit factor */
415 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
416 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
417 COSTS_N_INSNS (1), /* movcc/movr */
418 0, /* shift penalty */
419 5 /* branch cost */
422 static const
423 struct processor_costs niagara4_costs = {
424 COSTS_N_INSNS (5), /* int load */
425 COSTS_N_INSNS (5), /* int signed load */
426 COSTS_N_INSNS (5), /* int zeroed load */
427 COSTS_N_INSNS (5), /* float load */
428 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
429 COSTS_N_INSNS (11), /* fadd, fsub */
430 COSTS_N_INSNS (11), /* fcmp */
431 COSTS_N_INSNS (11), /* fmov, fmovr */
432 COSTS_N_INSNS (11), /* fmul */
433 COSTS_N_INSNS (24), /* fdivs */
434 COSTS_N_INSNS (37), /* fdivd */
435 COSTS_N_INSNS (24), /* fsqrts */
436 COSTS_N_INSNS (37), /* fsqrtd */
437 COSTS_N_INSNS (12), /* imul */
438 COSTS_N_INSNS (12), /* imulX */
439 0, /* imul bit factor */
440 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
441 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
442 COSTS_N_INSNS (1), /* movcc/movr */
443 0, /* shift penalty */
444 2 /* branch cost */
447 static const
448 struct processor_costs niagara7_costs = {
449 COSTS_N_INSNS (5), /* int load */
450 COSTS_N_INSNS (5), /* int signed load */
451 COSTS_N_INSNS (5), /* int zeroed load */
452 COSTS_N_INSNS (5), /* float load */
453 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
454 COSTS_N_INSNS (11), /* fadd, fsub */
455 COSTS_N_INSNS (11), /* fcmp */
456 COSTS_N_INSNS (11), /* fmov, fmovr */
457 COSTS_N_INSNS (11), /* fmul */
458 COSTS_N_INSNS (24), /* fdivs */
459 COSTS_N_INSNS (37), /* fdivd */
460 COSTS_N_INSNS (24), /* fsqrts */
461 COSTS_N_INSNS (37), /* fsqrtd */
462 COSTS_N_INSNS (12), /* imul */
463 COSTS_N_INSNS (12), /* imulX */
464 0, /* imul bit factor */
465 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
466 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
467 COSTS_N_INSNS (1), /* movcc/movr */
468 0, /* shift penalty */
469 1 /* branch cost */
472 static const
473 struct processor_costs m8_costs = {
474 COSTS_N_INSNS (3), /* int load */
475 COSTS_N_INSNS (3), /* int signed load */
476 COSTS_N_INSNS (3), /* int zeroed load */
477 COSTS_N_INSNS (3), /* float load */
478 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
479 COSTS_N_INSNS (9), /* fadd, fsub */
480 COSTS_N_INSNS (9), /* fcmp */
481 COSTS_N_INSNS (9), /* fmov, fmovr */
482 COSTS_N_INSNS (9), /* fmul */
483 COSTS_N_INSNS (26), /* fdivs */
484 COSTS_N_INSNS (30), /* fdivd */
485 COSTS_N_INSNS (33), /* fsqrts */
486 COSTS_N_INSNS (41), /* fsqrtd */
487 COSTS_N_INSNS (12), /* imul */
488 COSTS_N_INSNS (10), /* imulX */
489 0, /* imul bit factor */
490 COSTS_N_INSNS (57), /* udiv/sdiv */
491 COSTS_N_INSNS (30), /* udivx/sdivx */
492 COSTS_N_INSNS (1), /* movcc/movr */
493 0, /* shift penalty */
494 1 /* branch cost */
497 static const struct processor_costs *sparc_costs = &cypress_costs;
499 #ifdef HAVE_AS_RELAX_OPTION
500 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
501 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
502 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
503 somebody does not branch between the sethi and jmp. */
504 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
505 #else
506 #define LEAF_SIBCALL_SLOT_RESERVED_P \
507 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
508 #endif
510 /* Vector to say how input registers are mapped to output registers.
511 HARD_FRAME_POINTER_REGNUM cannot be remapped by this function to
512 eliminate it. You must use -fomit-frame-pointer to get that. */
513 char leaf_reg_remap[] =
514 { 0, 1, 2, 3, 4, 5, 6, 7,
515 -1, -1, -1, -1, -1, -1, 14, -1,
516 -1, -1, -1, -1, -1, -1, -1, -1,
517 8, 9, 10, 11, 12, 13, -1, 15,
519 32, 33, 34, 35, 36, 37, 38, 39,
520 40, 41, 42, 43, 44, 45, 46, 47,
521 48, 49, 50, 51, 52, 53, 54, 55,
522 56, 57, 58, 59, 60, 61, 62, 63,
523 64, 65, 66, 67, 68, 69, 70, 71,
524 72, 73, 74, 75, 76, 77, 78, 79,
525 80, 81, 82, 83, 84, 85, 86, 87,
526 88, 89, 90, 91, 92, 93, 94, 95,
527 96, 97, 98, 99, 100, 101, 102};
529 /* Vector, indexed by hard register number, which contains 1
530 for a register that is allowable in a candidate for leaf
531 function treatment. */
532 char sparc_leaf_regs[] =
533 { 1, 1, 1, 1, 1, 1, 1, 1,
534 0, 0, 0, 0, 0, 0, 1, 0,
535 0, 0, 0, 0, 0, 0, 0, 0,
536 1, 1, 1, 1, 1, 1, 0, 1,
537 1, 1, 1, 1, 1, 1, 1, 1,
538 1, 1, 1, 1, 1, 1, 1, 1,
539 1, 1, 1, 1, 1, 1, 1, 1,
540 1, 1, 1, 1, 1, 1, 1, 1,
541 1, 1, 1, 1, 1, 1, 1, 1,
542 1, 1, 1, 1, 1, 1, 1, 1,
543 1, 1, 1, 1, 1, 1, 1, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1};
547 struct GTY(()) machine_function
549 /* Size of the frame of the function. */
550 HOST_WIDE_INT frame_size;
552 /* Size of the frame of the function minus the register window save area
553 and the outgoing argument area. */
554 HOST_WIDE_INT apparent_frame_size;
556 /* Register we pretend the frame pointer is allocated to. Normally, this
557 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
558 record "offset" separately as it may be too big for (reg + disp). */
559 rtx frame_base_reg;
560 HOST_WIDE_INT frame_base_offset;
562 /* Number of global or FP registers to be saved (as 4-byte quantities). */
563 int n_global_fp_regs;
565 /* True if the current function is leaf and uses only leaf regs,
566 so that the SPARC leaf function optimization can be applied.
567 Private version of crtl->uses_only_leaf_regs, see
568 sparc_expand_prologue for the rationale. */
569 int leaf_function_p;
571 /* True if the prologue saves local or in registers. */
572 bool save_local_in_regs_p;
574 /* True if the data calculated by sparc_expand_prologue are valid. */
575 bool prologue_data_valid_p;
578 #define sparc_frame_size cfun->machine->frame_size
579 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
580 #define sparc_frame_base_reg cfun->machine->frame_base_reg
581 #define sparc_frame_base_offset cfun->machine->frame_base_offset
582 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
583 #define sparc_leaf_function_p cfun->machine->leaf_function_p
584 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
585 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
587 /* 1 if the next opcode is to be specially indented. */
588 int sparc_indent_opcode = 0;
590 static void sparc_option_override (void);
591 static void sparc_init_modes (void);
592 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
593 const_tree, bool, bool, int *, int *);
595 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
596 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
598 static void sparc_emit_set_const32 (rtx, rtx);
599 static void sparc_emit_set_const64 (rtx, rtx);
600 static void sparc_output_addr_vec (rtx);
601 static void sparc_output_addr_diff_vec (rtx);
602 static void sparc_output_deferred_case_vectors (void);
603 static bool sparc_legitimate_address_p (machine_mode, rtx, bool);
604 static bool sparc_legitimate_constant_p (machine_mode, rtx);
605 static rtx sparc_builtin_saveregs (void);
606 static int epilogue_renumber (rtx *, int);
607 static bool sparc_assemble_integer (rtx, unsigned int, int);
608 static int set_extends (rtx_insn *);
609 static void sparc_asm_function_prologue (FILE *);
610 static void sparc_asm_function_epilogue (FILE *);
611 #ifdef TARGET_SOLARIS
612 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
613 tree) ATTRIBUTE_UNUSED;
614 #endif
615 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
616 static int sparc_issue_rate (void);
617 static void sparc_sched_init (FILE *, int, int);
618 static int sparc_use_sched_lookahead (void);
620 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
621 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
622 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
623 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
624 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
626 static bool sparc_function_ok_for_sibcall (tree, tree);
627 static void sparc_init_libfuncs (void);
628 static void sparc_init_builtins (void);
629 static void sparc_fpu_init_builtins (void);
630 static void sparc_vis_init_builtins (void);
631 static tree sparc_builtin_decl (unsigned, bool);
632 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
633 static tree sparc_fold_builtin (tree, int, tree *, bool);
634 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
635 HOST_WIDE_INT, tree);
636 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
637 HOST_WIDE_INT, const_tree);
638 static struct machine_function * sparc_init_machine_status (void);
639 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
640 static rtx sparc_tls_get_addr (void);
641 static rtx sparc_tls_got (void);
642 static int sparc_register_move_cost (machine_mode,
643 reg_class_t, reg_class_t);
644 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
645 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
646 int *, const_tree, int);
647 static bool sparc_strict_argument_naming (cumulative_args_t);
648 static void sparc_va_start (tree, rtx);
649 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
650 static bool sparc_vector_mode_supported_p (machine_mode);
651 static bool sparc_tls_referenced_p (rtx);
652 static rtx sparc_legitimize_tls_address (rtx);
653 static rtx sparc_legitimize_pic_address (rtx, rtx);
654 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
655 static rtx sparc_delegitimize_address (rtx);
656 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
657 static bool sparc_pass_by_reference (cumulative_args_t,
658 const function_arg_info &);
659 static void sparc_function_arg_advance (cumulative_args_t,
660 const function_arg_info &);
661 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
662 static rtx sparc_function_incoming_arg (cumulative_args_t,
663 const function_arg_info &);
664 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
665 static unsigned int sparc_function_arg_boundary (machine_mode,
666 const_tree);
667 static int sparc_arg_partial_bytes (cumulative_args_t,
668 const function_arg_info &);
669 static bool sparc_return_in_memory (const_tree, const_tree);
670 static rtx sparc_struct_value_rtx (tree, int);
671 static rtx sparc_function_value (const_tree, const_tree, bool);
672 static rtx sparc_libcall_value (machine_mode, const_rtx);
673 static bool sparc_function_value_regno_p (const unsigned int);
674 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
675 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
676 static void sparc_file_end (void);
677 static bool sparc_frame_pointer_required (void);
678 static bool sparc_can_eliminate (const int, const int);
679 static void sparc_conditional_register_usage (void);
680 static bool sparc_use_pseudo_pic_reg (void);
681 static void sparc_init_pic_reg (void);
682 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
683 static const char *sparc_mangle_type (const_tree);
684 #endif
685 static void sparc_trampoline_init (rtx, tree, rtx);
686 static machine_mode sparc_preferred_simd_mode (scalar_mode);
687 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
688 static bool sparc_lra_p (void);
689 static bool sparc_print_operand_punct_valid_p (unsigned char);
690 static void sparc_print_operand (FILE *, rtx, int);
691 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
692 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
693 machine_mode,
694 secondary_reload_info *);
695 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
696 reg_class_t);
697 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
698 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
699 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
700 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
701 static unsigned int sparc_min_arithmetic_precision (void);
702 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
703 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
704 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
705 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
706 reg_class_t);
707 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
708 static bool sparc_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
709 const vec_perm_indices &);
710 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
712 #ifdef SUBTARGET_ATTRIBUTE_TABLE
713 /* Table of valid machine attributes. */
714 static const struct attribute_spec sparc_attribute_table[] =
716 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
717 do_diagnostic, handler, exclude } */
718 SUBTARGET_ATTRIBUTE_TABLE,
719 { NULL, 0, 0, false, false, false, false, NULL, NULL }
721 #endif
723 char sparc_hard_reg_printed[8];
725 /* Initialize the GCC target structure. */
727 /* The default is to use .half rather than .short for aligned HI objects. */
728 #undef TARGET_ASM_ALIGNED_HI_OP
729 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
731 #undef TARGET_ASM_UNALIGNED_HI_OP
732 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
733 #undef TARGET_ASM_UNALIGNED_SI_OP
734 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
735 #undef TARGET_ASM_UNALIGNED_DI_OP
736 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
738 /* The target hook has to handle DI-mode values. */
739 #undef TARGET_ASM_INTEGER
740 #define TARGET_ASM_INTEGER sparc_assemble_integer
742 #undef TARGET_ASM_FUNCTION_PROLOGUE
743 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
744 #undef TARGET_ASM_FUNCTION_EPILOGUE
745 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
747 #undef TARGET_SCHED_ADJUST_COST
748 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
749 #undef TARGET_SCHED_ISSUE_RATE
750 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
751 #undef TARGET_SCHED_INIT
752 #define TARGET_SCHED_INIT sparc_sched_init
753 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
754 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
756 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
757 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
759 #undef TARGET_INIT_LIBFUNCS
760 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
762 #undef TARGET_LEGITIMIZE_ADDRESS
763 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
764 #undef TARGET_DELEGITIMIZE_ADDRESS
765 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
766 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
767 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
769 #undef TARGET_INIT_BUILTINS
770 #define TARGET_INIT_BUILTINS sparc_init_builtins
771 #undef TARGET_BUILTIN_DECL
772 #define TARGET_BUILTIN_DECL sparc_builtin_decl
773 #undef TARGET_EXPAND_BUILTIN
774 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
775 #undef TARGET_FOLD_BUILTIN
776 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
778 #if TARGET_TLS
779 #undef TARGET_HAVE_TLS
780 #define TARGET_HAVE_TLS true
781 #endif
783 #undef TARGET_CANNOT_FORCE_CONST_MEM
784 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
786 #undef TARGET_ASM_OUTPUT_MI_THUNK
787 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
788 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
789 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
791 #undef TARGET_RTX_COSTS
792 #define TARGET_RTX_COSTS sparc_rtx_costs
793 #undef TARGET_ADDRESS_COST
794 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
795 #undef TARGET_REGISTER_MOVE_COST
796 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
798 #undef TARGET_PROMOTE_FUNCTION_MODE
799 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
800 #undef TARGET_STRICT_ARGUMENT_NAMING
801 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
803 #undef TARGET_MUST_PASS_IN_STACK
804 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
805 #undef TARGET_PASS_BY_REFERENCE
806 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
807 #undef TARGET_ARG_PARTIAL_BYTES
808 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
809 #undef TARGET_FUNCTION_ARG_ADVANCE
810 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
811 #undef TARGET_FUNCTION_ARG
812 #define TARGET_FUNCTION_ARG sparc_function_arg
813 #undef TARGET_FUNCTION_INCOMING_ARG
814 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
815 #undef TARGET_FUNCTION_ARG_PADDING
816 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
817 #undef TARGET_FUNCTION_ARG_BOUNDARY
818 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
820 #undef TARGET_RETURN_IN_MEMORY
821 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
822 #undef TARGET_STRUCT_VALUE_RTX
823 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
824 #undef TARGET_FUNCTION_VALUE
825 #define TARGET_FUNCTION_VALUE sparc_function_value
826 #undef TARGET_LIBCALL_VALUE
827 #define TARGET_LIBCALL_VALUE sparc_libcall_value
828 #undef TARGET_FUNCTION_VALUE_REGNO_P
829 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
831 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
832 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
834 #undef TARGET_ASAN_SHADOW_OFFSET
835 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
837 #undef TARGET_EXPAND_BUILTIN_VA_START
838 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
839 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
840 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
842 #undef TARGET_VECTOR_MODE_SUPPORTED_P
843 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
845 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
846 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
848 #ifdef SUBTARGET_INSERT_ATTRIBUTES
849 #undef TARGET_INSERT_ATTRIBUTES
850 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
851 #endif
853 #ifdef SUBTARGET_ATTRIBUTE_TABLE
854 #undef TARGET_ATTRIBUTE_TABLE
855 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
856 #endif
858 #undef TARGET_OPTION_OVERRIDE
859 #define TARGET_OPTION_OVERRIDE sparc_option_override
861 #ifdef TARGET_THREAD_SSP_OFFSET
862 #undef TARGET_STACK_PROTECT_GUARD
863 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
864 #endif
866 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
867 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
868 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
869 #endif
871 #undef TARGET_ASM_FILE_END
872 #define TARGET_ASM_FILE_END sparc_file_end
874 #undef TARGET_FRAME_POINTER_REQUIRED
875 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
877 #undef TARGET_CAN_ELIMINATE
878 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
880 #undef TARGET_PREFERRED_RELOAD_CLASS
881 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
883 #undef TARGET_SECONDARY_RELOAD
884 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
885 #undef TARGET_SECONDARY_MEMORY_NEEDED
886 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
887 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
888 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
890 #undef TARGET_CONDITIONAL_REGISTER_USAGE
891 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
893 #undef TARGET_INIT_PIC_REG
894 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
896 #undef TARGET_USE_PSEUDO_PIC_REG
897 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
899 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
900 #undef TARGET_MANGLE_TYPE
901 #define TARGET_MANGLE_TYPE sparc_mangle_type
902 #endif
904 #undef TARGET_LRA_P
905 #define TARGET_LRA_P sparc_lra_p
907 #undef TARGET_LEGITIMATE_ADDRESS_P
908 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
910 #undef TARGET_LEGITIMATE_CONSTANT_P
911 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
913 #undef TARGET_TRAMPOLINE_INIT
914 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
916 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
917 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
918 #undef TARGET_PRINT_OPERAND
919 #define TARGET_PRINT_OPERAND sparc_print_operand
920 #undef TARGET_PRINT_OPERAND_ADDRESS
921 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
923 /* The value stored by LDSTUB. */
924 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
925 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
927 #undef TARGET_CSTORE_MODE
928 #define TARGET_CSTORE_MODE sparc_cstore_mode
930 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
931 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
933 #undef TARGET_FIXED_CONDITION_CODE_REGS
934 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
936 #undef TARGET_MIN_ARITHMETIC_PRECISION
937 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
939 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
940 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
942 #undef TARGET_HARD_REGNO_NREGS
943 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
944 #undef TARGET_HARD_REGNO_MODE_OK
945 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
947 #undef TARGET_MODES_TIEABLE_P
948 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
950 #undef TARGET_CAN_CHANGE_MODE_CLASS
951 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
953 #undef TARGET_CONSTANT_ALIGNMENT
954 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
956 #undef TARGET_VECTORIZE_VEC_PERM_CONST
957 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
959 #undef TARGET_CAN_FOLLOW_JUMP
960 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
962 struct gcc_target targetm = TARGET_INITIALIZER;
964 /* Return the memory reference contained in X if any, zero otherwise. */
966 static rtx
967 mem_ref (rtx x)
969 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
970 x = XEXP (x, 0);
972 if (MEM_P (x))
973 return x;
975 return NULL_RTX;
978 /* True if any of INSN's source register(s) is REG. */
980 static bool
981 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
983 extract_insn (insn);
984 return ((REG_P (recog_data.operand[1])
985 && REGNO (recog_data.operand[1]) == reg)
986 || (recog_data.n_operands == 3
987 && REG_P (recog_data.operand[2])
988 && REGNO (recog_data.operand[2]) == reg));
991 /* True if INSN is a floating-point division or square-root. */
993 static bool
994 div_sqrt_insn_p (rtx_insn *insn)
996 if (GET_CODE (PATTERN (insn)) != SET)
997 return false;
999 switch (get_attr_type (insn))
1001 case TYPE_FPDIVS:
1002 case TYPE_FPSQRTS:
1003 case TYPE_FPDIVD:
1004 case TYPE_FPSQRTD:
1005 return true;
1006 default:
1007 return false;
1011 /* True if INSN is a floating-point instruction. */
1013 static bool
1014 fpop_insn_p (rtx_insn *insn)
1016 if (GET_CODE (PATTERN (insn)) != SET)
1017 return false;
1019 switch (get_attr_type (insn))
1021 case TYPE_FPMOVE:
1022 case TYPE_FPCMOVE:
1023 case TYPE_FP:
1024 case TYPE_FPCMP:
1025 case TYPE_FPMUL:
1026 case TYPE_FPDIVS:
1027 case TYPE_FPSQRTS:
1028 case TYPE_FPDIVD:
1029 case TYPE_FPSQRTD:
1030 return true;
1031 default:
1032 return false;
1036 /* True if INSN is an atomic instruction. */
1038 static bool
1039 atomic_insn_for_leon3_p (rtx_insn *insn)
1041 switch (INSN_CODE (insn))
1043 case CODE_FOR_swapsi:
1044 case CODE_FOR_ldstub:
1045 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1046 return true;
1047 default:
1048 return false;
1052 /* We use a machine specific pass to enable workarounds for errata.
1054 We need to have the (essentially) final form of the insn stream in order
1055 to properly detect the various hazards. Therefore, this machine specific
1056 pass runs as late as possible. */
1058 /* True if INSN is a md pattern or asm statement. */
1059 #define USEFUL_INSN_P(INSN) \
1060 (NONDEBUG_INSN_P (INSN) \
1061 && GET_CODE (PATTERN (INSN)) != USE \
1062 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1064 static unsigned int
1065 sparc_do_work_around_errata (void)
1067 rtx_insn *insn, *next;
1069 /* Force all instructions to be split into their final form. */
1070 split_all_insns_noflow ();
1072 /* Now look for specific patterns in the insn stream. */
1073 for (insn = get_insns (); insn; insn = next)
1075 bool insert_nop = false;
1076 rtx set;
1077 rtx_insn *jump;
1078 rtx_sequence *seq;
1080 /* Look into the instruction in a delay slot. */
1081 if (NONJUMP_INSN_P (insn)
1082 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1084 jump = seq->insn (0);
1085 insn = seq->insn (1);
1087 else if (JUMP_P (insn))
1088 jump = insn;
1089 else
1090 jump = NULL;
1092 /* Place a NOP at the branch target of an integer branch if it is a
1093 floating-point operation or a floating-point branch. */
1094 if (sparc_fix_gr712rc
1095 && jump
1096 && jump_to_label_p (jump)
1097 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1099 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1100 if (target
1101 && (fpop_insn_p (target)
1102 || (JUMP_P (target)
1103 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1104 emit_insn_before (gen_nop (), target);
1107 /* Insert a NOP between load instruction and atomic instruction. Insert
1108 a NOP at branch target if there is a load in delay slot and an atomic
1109 instruction at branch target. */
1110 if (sparc_fix_ut700
1111 && NONJUMP_INSN_P (insn)
1112 && (set = single_set (insn)) != NULL_RTX
1113 && mem_ref (SET_SRC (set))
1114 && REG_P (SET_DEST (set)))
1116 if (jump && jump_to_label_p (jump))
1118 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1119 if (target && atomic_insn_for_leon3_p (target))
1120 emit_insn_before (gen_nop (), target);
1123 next = next_active_insn (insn);
1124 if (!next)
1125 break;
1127 if (atomic_insn_for_leon3_p (next))
1128 insert_nop = true;
1131 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1132 ends with another fdiv or fsqrt instruction with no dependencies on
1133 the former, along with an appropriate pattern in between. */
1134 if (sparc_fix_lost_divsqrt
1135 && NONJUMP_INSN_P (insn)
1136 && div_sqrt_insn_p (insn))
1138 int i;
1139 int fp_found = 0;
1140 rtx_insn *after;
1142 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1144 next = next_active_insn (insn);
1145 if (!next)
1146 break;
1148 for (after = next, i = 0; i < 4; i++)
1150 /* Count floating-point operations. */
1151 if (i != 3 && fpop_insn_p (after))
1153 /* If the insn uses the destination register of
1154 the div/sqrt, then it cannot be problematic. */
1155 if (insn_uses_reg_p (after, dest_reg))
1156 break;
1157 fp_found++;
1160 /* Count floating-point loads. */
1161 if (i != 3
1162 && (set = single_set (after)) != NULL_RTX
1163 && REG_P (SET_DEST (set))
1164 && REGNO (SET_DEST (set)) > 31)
1166 /* If the insn uses the destination register of
1167 the div/sqrt, then it cannot be problematic. */
1168 if (REGNO (SET_DEST (set)) == dest_reg)
1169 break;
1170 fp_found++;
1173 /* Check if this is a problematic sequence. */
1174 if (i > 1
1175 && fp_found >= 2
1176 && div_sqrt_insn_p (after))
1178 /* If this is the short version of the problematic
1179 sequence we add two NOPs in a row to also prevent
1180 the long version. */
1181 if (i == 2)
1182 emit_insn_before (gen_nop (), next);
1183 insert_nop = true;
1184 break;
1187 /* No need to scan past a second div/sqrt. */
1188 if (div_sqrt_insn_p (after))
1189 break;
1191 /* Insert NOP before branch. */
1192 if (i < 3
1193 && (!NONJUMP_INSN_P (after)
1194 || GET_CODE (PATTERN (after)) == SEQUENCE))
1196 insert_nop = true;
1197 break;
1200 after = next_active_insn (after);
1201 if (!after)
1202 break;
1206 /* Look for either of these two sequences:
1208 Sequence A:
1209 1. store of word size or less (e.g. st / stb / sth / stf)
1210 2. any single instruction that is not a load or store
1211 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1213 Sequence B:
1214 1. store of double word size (e.g. std / stdf)
1215 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1216 if (sparc_fix_b2bst
1217 && NONJUMP_INSN_P (insn)
1218 && (set = single_set (insn)) != NULL_RTX
1219 && MEM_P (SET_DEST (set)))
1221 /* Sequence B begins with a double-word store. */
1222 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1223 rtx_insn *after;
1224 int i;
1226 next = next_active_insn (insn);
1227 if (!next)
1228 break;
1230 for (after = next, i = 0; i < 2; i++)
1232 /* Skip empty assembly statements. */
1233 if ((GET_CODE (PATTERN (after)) == UNSPEC_VOLATILE)
1234 || (USEFUL_INSN_P (after)
1235 && (asm_noperands (PATTERN (after))>=0)
1236 && !strcmp (decode_asm_operands (PATTERN (after),
1237 NULL, NULL, NULL,
1238 NULL, NULL), "")))
1239 after = next_active_insn (after);
1240 if (!after)
1241 break;
1243 /* If the insn is a branch, then it cannot be problematic. */
1244 if (!NONJUMP_INSN_P (after)
1245 || GET_CODE (PATTERN (after)) == SEQUENCE)
1246 break;
1248 /* Sequence B is only two instructions long. */
1249 if (seq_b)
1251 /* Add NOP if followed by a store. */
1252 if ((set = single_set (after)) != NULL_RTX
1253 && MEM_P (SET_DEST (set)))
1254 insert_nop = true;
1256 /* Otherwise it is ok. */
1257 break;
1260 /* If the second instruction is a load or a store,
1261 then the sequence cannot be problematic. */
1262 if (i == 0)
1264 if ((set = single_set (after)) != NULL_RTX
1265 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1266 break;
1268 after = next_active_insn (after);
1269 if (!after)
1270 break;
1273 /* Add NOP if third instruction is a store. */
1274 if (i == 1
1275 && (set = single_set (after)) != NULL_RTX
1276 && MEM_P (SET_DEST (set)))
1277 insert_nop = true;
1281 /* Look for a single-word load into an odd-numbered FP register. */
1282 else if (sparc_fix_at697f
1283 && NONJUMP_INSN_P (insn)
1284 && (set = single_set (insn)) != NULL_RTX
1285 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1286 && mem_ref (SET_SRC (set))
1287 && REG_P (SET_DEST (set))
1288 && REGNO (SET_DEST (set)) > 31
1289 && REGNO (SET_DEST (set)) % 2 != 0)
1291 /* The wrong dependency is on the enclosing double register. */
1292 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1293 unsigned int src1, src2, dest;
1294 int code;
1296 next = next_active_insn (insn);
1297 if (!next)
1298 break;
1299 /* If the insn is a branch, then it cannot be problematic. */
1300 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1301 continue;
1303 extract_insn (next);
1304 code = INSN_CODE (next);
1306 switch (code)
1308 case CODE_FOR_adddf3:
1309 case CODE_FOR_subdf3:
1310 case CODE_FOR_muldf3:
1311 case CODE_FOR_divdf3:
1312 dest = REGNO (recog_data.operand[0]);
1313 src1 = REGNO (recog_data.operand[1]);
1314 src2 = REGNO (recog_data.operand[2]);
1315 if (src1 != src2)
1317 /* Case [1-4]:
1318 ld [address], %fx+1
1319 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1320 if ((src1 == x || src2 == x)
1321 && (dest == src1 || dest == src2))
1322 insert_nop = true;
1324 else
1326 /* Case 5:
1327 ld [address], %fx+1
1328 FPOPd %fx, %fx, %fx */
1329 if (src1 == x
1330 && dest == src1
1331 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1332 insert_nop = true;
1334 break;
1336 case CODE_FOR_sqrtdf2:
1337 dest = REGNO (recog_data.operand[0]);
1338 src1 = REGNO (recog_data.operand[1]);
1339 /* Case 6:
1340 ld [address], %fx+1
1341 fsqrtd %fx, %fx */
1342 if (src1 == x && dest == src1)
1343 insert_nop = true;
1344 break;
1346 default:
1347 break;
1351 /* Look for a single-word load into an integer register. */
1352 else if (sparc_fix_ut699
1353 && NONJUMP_INSN_P (insn)
1354 && (set = single_set (insn)) != NULL_RTX
1355 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1356 && (mem_ref (SET_SRC (set)) != NULL_RTX
1357 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1358 && REG_P (SET_DEST (set))
1359 && REGNO (SET_DEST (set)) < 32)
1361 /* There is no problem if the second memory access has a data
1362 dependency on the first single-cycle load. */
1363 rtx x = SET_DEST (set);
1365 next = next_active_insn (insn);
1366 if (!next)
1367 break;
1368 /* If the insn is a branch, then it cannot be problematic. */
1369 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1370 continue;
1372 /* Look for a second memory access to/from an integer register. */
1373 if ((set = single_set (next)) != NULL_RTX)
1375 rtx src = SET_SRC (set);
1376 rtx dest = SET_DEST (set);
1377 rtx mem;
1379 /* LDD is affected. */
1380 if ((mem = mem_ref (src)) != NULL_RTX
1381 && REG_P (dest)
1382 && REGNO (dest) < 32
1383 && !reg_mentioned_p (x, XEXP (mem, 0)))
1384 insert_nop = true;
1386 /* STD is *not* affected. */
1387 else if (MEM_P (dest)
1388 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1389 && (src == CONST0_RTX (GET_MODE (dest))
1390 || (REG_P (src)
1391 && REGNO (src) < 32
1392 && REGNO (src) != REGNO (x)))
1393 && !reg_mentioned_p (x, XEXP (dest, 0)))
1394 insert_nop = true;
1396 /* GOT accesses uses LD. */
1397 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1398 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1399 insert_nop = true;
1403 /* Look for a single-word load/operation into an FP register. */
1404 else if (sparc_fix_ut699
1405 && NONJUMP_INSN_P (insn)
1406 && (set = single_set (insn)) != NULL_RTX
1407 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1408 && REG_P (SET_DEST (set))
1409 && REGNO (SET_DEST (set)) > 31)
1411 /* Number of instructions in the problematic window. */
1412 const int n_insns = 4;
1413 /* The problematic combination is with the sibling FP register. */
1414 const unsigned int x = REGNO (SET_DEST (set));
1415 const unsigned int y = x ^ 1;
1416 rtx_insn *after;
1417 int i;
1419 next = next_active_insn (insn);
1420 if (!next)
1421 break;
1422 /* If the insn is a branch, then it cannot be problematic. */
1423 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1424 continue;
1426 /* Look for a second load/operation into the sibling FP register. */
1427 if (!((set = single_set (next)) != NULL_RTX
1428 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1429 && REG_P (SET_DEST (set))
1430 && REGNO (SET_DEST (set)) == y))
1431 continue;
1433 /* Look for a (possible) store from the FP register in the next N
1434 instructions, but bail out if it is again modified or if there
1435 is a store from the sibling FP register before this store. */
1436 for (after = next, i = 0; i < n_insns; i++)
1438 bool branch_p;
1440 after = next_active_insn (after);
1441 if (!after)
1442 break;
1444 /* This is a branch with an empty delay slot. */
1445 if (!NONJUMP_INSN_P (after))
1447 if (++i == n_insns)
1448 break;
1449 branch_p = true;
1450 after = NULL;
1452 /* This is a branch with a filled delay slot. */
1453 else if (rtx_sequence *seq =
1454 dyn_cast <rtx_sequence *> (PATTERN (after)))
1456 if (++i == n_insns)
1457 break;
1458 branch_p = true;
1459 after = seq->insn (1);
1461 /* This is a regular instruction. */
1462 else
1463 branch_p = false;
1465 if (after && (set = single_set (after)) != NULL_RTX)
1467 const rtx src = SET_SRC (set);
1468 const rtx dest = SET_DEST (set);
1469 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1471 /* If the FP register is again modified before the store,
1472 then the store isn't affected. */
1473 if (REG_P (dest)
1474 && (REGNO (dest) == x
1475 || (REGNO (dest) == y && size == 8)))
1476 break;
1478 if (MEM_P (dest) && REG_P (src))
1480 /* If there is a store from the sibling FP register
1481 before the store, then the store is not affected. */
1482 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1483 break;
1485 /* Otherwise, the store is affected. */
1486 if (REGNO (src) == x && size == 4)
1488 insert_nop = true;
1489 break;
1494 /* If we have a branch in the first M instructions, then we
1495 cannot see the (M+2)th instruction so we play safe. */
1496 if (branch_p && i <= (n_insns - 2))
1498 insert_nop = true;
1499 break;
1504 else
1505 next = NEXT_INSN (insn);
1507 if (insert_nop)
1508 emit_insn_before (gen_nop (), next);
1511 return 0;
1514 namespace {
1516 const pass_data pass_data_work_around_errata =
1518 RTL_PASS, /* type */
1519 "errata", /* name */
1520 OPTGROUP_NONE, /* optinfo_flags */
1521 TV_MACH_DEP, /* tv_id */
1522 0, /* properties_required */
1523 0, /* properties_provided */
1524 0, /* properties_destroyed */
1525 0, /* todo_flags_start */
1526 0, /* todo_flags_finish */
1529 class pass_work_around_errata : public rtl_opt_pass
1531 public:
1532 pass_work_around_errata(gcc::context *ctxt)
1533 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1536 /* opt_pass methods: */
1537 virtual bool gate (function *)
1539 return sparc_fix_at697f
1540 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1541 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1544 virtual unsigned int execute (function *)
1546 return sparc_do_work_around_errata ();
1549 }; // class pass_work_around_errata
1551 } // anon namespace
1553 rtl_opt_pass *
1554 make_pass_work_around_errata (gcc::context *ctxt)
1556 return new pass_work_around_errata (ctxt);
1559 /* Helpers for TARGET_DEBUG_OPTIONS. */
1560 static void
1561 dump_target_flag_bits (const int flags)
1563 if (flags & MASK_64BIT)
1564 fprintf (stderr, "64BIT ");
1565 if (flags & MASK_APP_REGS)
1566 fprintf (stderr, "APP_REGS ");
1567 if (flags & MASK_FASTER_STRUCTS)
1568 fprintf (stderr, "FASTER_STRUCTS ");
1569 if (flags & MASK_FLAT)
1570 fprintf (stderr, "FLAT ");
1571 if (flags & MASK_FMAF)
1572 fprintf (stderr, "FMAF ");
1573 if (flags & MASK_FSMULD)
1574 fprintf (stderr, "FSMULD ");
1575 if (flags & MASK_FPU)
1576 fprintf (stderr, "FPU ");
1577 if (flags & MASK_HARD_QUAD)
1578 fprintf (stderr, "HARD_QUAD ");
1579 if (flags & MASK_POPC)
1580 fprintf (stderr, "POPC ");
1581 if (flags & MASK_PTR64)
1582 fprintf (stderr, "PTR64 ");
1583 if (flags & MASK_STACK_BIAS)
1584 fprintf (stderr, "STACK_BIAS ");
1585 if (flags & MASK_UNALIGNED_DOUBLES)
1586 fprintf (stderr, "UNALIGNED_DOUBLES ");
1587 if (flags & MASK_V8PLUS)
1588 fprintf (stderr, "V8PLUS ");
1589 if (flags & MASK_VIS)
1590 fprintf (stderr, "VIS ");
1591 if (flags & MASK_VIS2)
1592 fprintf (stderr, "VIS2 ");
1593 if (flags & MASK_VIS3)
1594 fprintf (stderr, "VIS3 ");
1595 if (flags & MASK_VIS4)
1596 fprintf (stderr, "VIS4 ");
1597 if (flags & MASK_VIS4B)
1598 fprintf (stderr, "VIS4B ");
1599 if (flags & MASK_CBCOND)
1600 fprintf (stderr, "CBCOND ");
1601 if (flags & MASK_DEPRECATED_V8_INSNS)
1602 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1603 if (flags & MASK_SPARCLET)
1604 fprintf (stderr, "SPARCLET ");
1605 if (flags & MASK_SPARCLITE)
1606 fprintf (stderr, "SPARCLITE ");
1607 if (flags & MASK_V8)
1608 fprintf (stderr, "V8 ");
1609 if (flags & MASK_V9)
1610 fprintf (stderr, "V9 ");
1613 static void
1614 dump_target_flags (const char *prefix, const int flags)
1616 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1617 dump_target_flag_bits (flags);
1618 fprintf(stderr, "]\n");
1621 /* Validate and override various options, and do some machine dependent
1622 initialization. */
1624 static void
1625 sparc_option_override (void)
1627 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1628 static struct cpu_default {
1629 const int cpu;
1630 const enum sparc_processor_type processor;
1631 } const cpu_default[] = {
1632 /* There must be one entry here for each TARGET_CPU value. */
1633 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1634 { TARGET_CPU_v8, PROCESSOR_V8 },
1635 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1636 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1637 { TARGET_CPU_leon, PROCESSOR_LEON },
1638 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1639 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1640 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1641 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1642 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1643 { TARGET_CPU_v9, PROCESSOR_V9 },
1644 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1645 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1646 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1647 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1648 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1649 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1650 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1651 { TARGET_CPU_m8, PROCESSOR_M8 },
1652 { -1, PROCESSOR_V7 }
1654 const struct cpu_default *def;
1655 /* Table of values for -m{cpu,tune}=. This must match the order of
1656 the enum processor_type in sparc-opts.h. */
1657 static struct cpu_table {
1658 const char *const name;
1659 const int disable;
1660 const int enable;
1661 } const cpu_table[] = {
1662 { "v7", MASK_ISA, 0 },
1663 { "cypress", MASK_ISA, 0 },
1664 { "v8", MASK_ISA, MASK_V8 },
1665 /* TI TMS390Z55 supersparc */
1666 { "supersparc", MASK_ISA, MASK_V8 },
1667 { "hypersparc", MASK_ISA, MASK_V8 },
1668 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1669 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1670 { "leon3v7", MASK_ISA, MASK_LEON3 },
1671 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1672 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1673 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1674 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1675 { "f934", MASK_ISA, MASK_SPARCLITE },
1676 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1677 { "sparclet", MASK_ISA, MASK_SPARCLET },
1678 /* TEMIC sparclet */
1679 { "tsc701", MASK_ISA, MASK_SPARCLET },
1680 { "v9", MASK_ISA, MASK_V9 },
1681 /* UltraSPARC I, II, IIi */
1682 { "ultrasparc", MASK_ISA,
1683 /* Although insns using %y are deprecated, it is a clear win. */
1684 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1685 /* UltraSPARC III */
1686 /* ??? Check if %y issue still holds true. */
1687 { "ultrasparc3", MASK_ISA,
1688 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1689 /* UltraSPARC T1 */
1690 { "niagara", MASK_ISA,
1691 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1692 /* UltraSPARC T2 */
1693 { "niagara2", MASK_ISA,
1694 MASK_V9|MASK_POPC|MASK_VIS2 },
1695 /* UltraSPARC T3 */
1696 { "niagara3", MASK_ISA,
1697 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1698 /* UltraSPARC T4 */
1699 { "niagara4", MASK_ISA,
1700 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1701 /* UltraSPARC M7 */
1702 { "niagara7", MASK_ISA,
1703 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1704 /* UltraSPARC M8 */
1705 { "m8", MASK_ISA,
1706 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1708 const struct cpu_table *cpu;
1709 unsigned int i;
1711 if (sparc_debug_string != NULL)
1713 const char *q;
1714 char *p;
1716 p = ASTRDUP (sparc_debug_string);
1717 while ((q = strtok (p, ",")) != NULL)
1719 bool invert;
1720 int mask;
1722 p = NULL;
1723 if (*q == '!')
1725 invert = true;
1726 q++;
1728 else
1729 invert = false;
1731 if (! strcmp (q, "all"))
1732 mask = MASK_DEBUG_ALL;
1733 else if (! strcmp (q, "options"))
1734 mask = MASK_DEBUG_OPTIONS;
1735 else
1736 error ("unknown %<-mdebug-%s%> switch", q);
1738 if (invert)
1739 sparc_debug &= ~mask;
1740 else
1741 sparc_debug |= mask;
1745 /* Enable the FsMULd instruction by default if not explicitly specified by
1746 the user. It may be later disabled by the CPU (explicitly or not). */
1747 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1748 target_flags |= MASK_FSMULD;
1750 if (TARGET_DEBUG_OPTIONS)
1752 dump_target_flags("Initial target_flags", target_flags);
1753 dump_target_flags("target_flags_explicit", target_flags_explicit);
1756 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1757 SUBTARGET_OVERRIDE_OPTIONS;
1758 #endif
1760 #ifndef SPARC_BI_ARCH
1761 /* Check for unsupported architecture size. */
1762 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1763 error ("%s is not supported by this configuration",
1764 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1765 #endif
1767 /* We force all 64bit archs to use 128 bit long double */
1768 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1770 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1771 target_flags |= MASK_LONG_DOUBLE_128;
1774 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1775 for (i = 8; i < 16; i++)
1776 if (!call_used_regs [i])
1778 error ("%<-fcall-saved-REG%> is not supported for out registers");
1779 call_used_regs [i] = 1;
1782 /* Set the default CPU if no -mcpu option was specified. */
1783 if (!global_options_set.x_sparc_cpu_and_features)
1785 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1786 if (def->cpu == TARGET_CPU_DEFAULT)
1787 break;
1788 gcc_assert (def->cpu != -1);
1789 sparc_cpu_and_features = def->processor;
1792 /* Set the default CPU if no -mtune option was specified. */
1793 if (!global_options_set.x_sparc_cpu)
1794 sparc_cpu = sparc_cpu_and_features;
1796 cpu = &cpu_table[(int) sparc_cpu_and_features];
1798 if (TARGET_DEBUG_OPTIONS)
1800 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1801 dump_target_flags ("cpu->disable", cpu->disable);
1802 dump_target_flags ("cpu->enable", cpu->enable);
1805 target_flags &= ~cpu->disable;
1806 target_flags |= (cpu->enable
1807 #ifndef HAVE_AS_FMAF_HPC_VIS3
1808 & ~(MASK_FMAF | MASK_VIS3)
1809 #endif
1810 #ifndef HAVE_AS_SPARC4
1811 & ~MASK_CBCOND
1812 #endif
1813 #ifndef HAVE_AS_SPARC5_VIS4
1814 & ~(MASK_VIS4 | MASK_SUBXC)
1815 #endif
1816 #ifndef HAVE_AS_SPARC6
1817 & ~(MASK_VIS4B)
1818 #endif
1819 #ifndef HAVE_AS_LEON
1820 & ~(MASK_LEON | MASK_LEON3)
1821 #endif
1822 & ~(target_flags_explicit & MASK_FEATURES)
1825 /* FsMULd is a V8 instruction. */
1826 if (!TARGET_V8 && !TARGET_V9)
1827 target_flags &= ~MASK_FSMULD;
1829 /* -mvis2 implies -mvis. */
1830 if (TARGET_VIS2)
1831 target_flags |= MASK_VIS;
1833 /* -mvis3 implies -mvis2 and -mvis. */
1834 if (TARGET_VIS3)
1835 target_flags |= MASK_VIS2 | MASK_VIS;
1837 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1838 if (TARGET_VIS4)
1839 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1841 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1842 if (TARGET_VIS4B)
1843 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1845 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1846 FPU is disabled. */
1847 if (!TARGET_FPU)
1848 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1849 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1851 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1852 are available; -m64 also implies v9. */
1853 if (TARGET_VIS || TARGET_ARCH64)
1855 target_flags |= MASK_V9;
1856 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1859 /* -mvis also implies -mv8plus on 32-bit. */
1860 if (TARGET_VIS && !TARGET_ARCH64)
1861 target_flags |= MASK_V8PLUS;
1863 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1864 if (TARGET_V9 && TARGET_ARCH32)
1865 target_flags |= MASK_DEPRECATED_V8_INSNS;
1867 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1868 if (!TARGET_V9 || TARGET_ARCH64)
1869 target_flags &= ~MASK_V8PLUS;
1871 /* Don't use stack biasing in 32-bit mode. */
1872 if (TARGET_ARCH32)
1873 target_flags &= ~MASK_STACK_BIAS;
1875 /* Use LRA instead of reload, unless otherwise instructed. */
1876 if (!(target_flags_explicit & MASK_LRA))
1877 target_flags |= MASK_LRA;
1879 /* Enable applicable errata workarounds for LEON3FT. */
1880 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1882 sparc_fix_b2bst = 1;
1883 sparc_fix_lost_divsqrt = 1;
1886 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1887 if (sparc_fix_ut699)
1888 target_flags &= ~MASK_FSMULD;
1890 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1891 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1892 target_flags |= MASK_LONG_DOUBLE_128;
1893 #endif
1895 if (TARGET_DEBUG_OPTIONS)
1896 dump_target_flags ("Final target_flags", target_flags);
1898 /* Set the code model if no -mcmodel option was specified. */
1899 if (global_options_set.x_sparc_code_model)
1901 if (TARGET_ARCH32)
1902 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1904 else
1906 if (TARGET_ARCH32)
1907 sparc_code_model = CM_32;
1908 else
1909 sparc_code_model = SPARC_DEFAULT_CMODEL;
1912 /* Set the memory model if no -mmemory-model option was specified. */
1913 if (!global_options_set.x_sparc_memory_model)
1915 /* Choose the memory model for the operating system. */
1916 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1917 if (os_default != SMM_DEFAULT)
1918 sparc_memory_model = os_default;
1919 /* Choose the most relaxed model for the processor. */
1920 else if (TARGET_V9)
1921 sparc_memory_model = SMM_RMO;
1922 else if (TARGET_LEON3)
1923 sparc_memory_model = SMM_TSO;
1924 else if (TARGET_LEON)
1925 sparc_memory_model = SMM_SC;
1926 else if (TARGET_V8)
1927 sparc_memory_model = SMM_PSO;
1928 else
1929 sparc_memory_model = SMM_SC;
1932 /* Supply a default value for align_functions. */
1933 if (flag_align_functions && !str_align_functions)
1935 if (sparc_cpu == PROCESSOR_ULTRASPARC
1936 || sparc_cpu == PROCESSOR_ULTRASPARC3
1937 || sparc_cpu == PROCESSOR_NIAGARA
1938 || sparc_cpu == PROCESSOR_NIAGARA2
1939 || sparc_cpu == PROCESSOR_NIAGARA3
1940 || sparc_cpu == PROCESSOR_NIAGARA4)
1941 str_align_functions = "32";
1942 else if (sparc_cpu == PROCESSOR_NIAGARA7
1943 || sparc_cpu == PROCESSOR_M8)
1944 str_align_functions = "64";
1947 /* Validate PCC_STRUCT_RETURN. */
1948 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
1949 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
1951 /* Only use .uaxword when compiling for a 64-bit target. */
1952 if (!TARGET_ARCH64)
1953 targetm.asm_out.unaligned_op.di = NULL;
1955 /* Set the processor costs. */
1956 switch (sparc_cpu)
1958 case PROCESSOR_V7:
1959 case PROCESSOR_CYPRESS:
1960 sparc_costs = &cypress_costs;
1961 break;
1962 case PROCESSOR_V8:
1963 case PROCESSOR_SPARCLITE:
1964 case PROCESSOR_SUPERSPARC:
1965 sparc_costs = &supersparc_costs;
1966 break;
1967 case PROCESSOR_F930:
1968 case PROCESSOR_F934:
1969 case PROCESSOR_HYPERSPARC:
1970 case PROCESSOR_SPARCLITE86X:
1971 sparc_costs = &hypersparc_costs;
1972 break;
1973 case PROCESSOR_LEON:
1974 sparc_costs = &leon_costs;
1975 break;
1976 case PROCESSOR_LEON3:
1977 case PROCESSOR_LEON3V7:
1978 sparc_costs = &leon3_costs;
1979 break;
1980 case PROCESSOR_SPARCLET:
1981 case PROCESSOR_TSC701:
1982 sparc_costs = &sparclet_costs;
1983 break;
1984 case PROCESSOR_V9:
1985 case PROCESSOR_ULTRASPARC:
1986 sparc_costs = &ultrasparc_costs;
1987 break;
1988 case PROCESSOR_ULTRASPARC3:
1989 sparc_costs = &ultrasparc3_costs;
1990 break;
1991 case PROCESSOR_NIAGARA:
1992 sparc_costs = &niagara_costs;
1993 break;
1994 case PROCESSOR_NIAGARA2:
1995 sparc_costs = &niagara2_costs;
1996 break;
1997 case PROCESSOR_NIAGARA3:
1998 sparc_costs = &niagara3_costs;
1999 break;
2000 case PROCESSOR_NIAGARA4:
2001 sparc_costs = &niagara4_costs;
2002 break;
2003 case PROCESSOR_NIAGARA7:
2004 sparc_costs = &niagara7_costs;
2005 break;
2006 case PROCESSOR_M8:
2007 sparc_costs = &m8_costs;
2008 break;
2009 case PROCESSOR_NATIVE:
2010 gcc_unreachable ();
2013 /* param_simultaneous_prefetches is the number of prefetches that
2014 can run at the same time. More important, it is the threshold
2015 defining when additional prefetches will be dropped by the
2016 hardware.
2018 The UltraSPARC-III features a documented prefetch queue with a
2019 size of 8. Additional prefetches issued in the cpu are
2020 dropped.
2022 Niagara processors are different. In these processors prefetches
2023 are handled much like regular loads. The L1 miss buffer is 32
2024 entries, but prefetches start getting affected when 30 entries
2025 become occupied. That occupation could be a mix of regular loads
2026 and prefetches though. And that buffer is shared by all threads.
2027 Once the threshold is reached, if the core is running a single
2028 thread the prefetch will retry. If more than one thread is
2029 running, the prefetch will be dropped.
2031 All this makes it very difficult to determine how many
2032 simultaneous prefetches can be issued simultaneously, even in a
2033 single-threaded program. Experimental results show that setting
2034 this parameter to 32 works well when the number of threads is not
2035 high. */
2036 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2037 param_simultaneous_prefetches,
2038 ((sparc_cpu == PROCESSOR_ULTRASPARC
2039 || sparc_cpu == PROCESSOR_NIAGARA
2040 || sparc_cpu == PROCESSOR_NIAGARA2
2041 || sparc_cpu == PROCESSOR_NIAGARA3
2042 || sparc_cpu == PROCESSOR_NIAGARA4)
2044 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2045 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2046 || sparc_cpu == PROCESSOR_M8)
2047 ? 32 : 3))));
2049 /* param_l1_cache_line_size is the size of the L1 cache line, in
2050 bytes.
2052 The Oracle SPARC Architecture (previously the UltraSPARC
2053 Architecture) specification states that when a PREFETCH[A]
2054 instruction is executed an implementation-specific amount of data
2055 is prefetched, and that it is at least 64 bytes long (aligned to
2056 at least 64 bytes).
2058 However, this is not correct. The M7 (and implementations prior
2059 to that) does not guarantee a 64B prefetch into a cache if the
2060 line size is smaller. A single cache line is all that is ever
2061 prefetched. So for the M7, where the L1D$ has 32B lines and the
2062 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2063 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2064 is a read_n prefetch, which is the only type which allocates to
2065 the L1.) */
2066 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2067 param_l1_cache_line_size,
2068 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2070 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2071 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2072 Niagara processors feature a L1D$ of 16KB. */
2073 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2074 param_l1_cache_size,
2075 ((sparc_cpu == PROCESSOR_ULTRASPARC
2076 || sparc_cpu == PROCESSOR_ULTRASPARC3
2077 || sparc_cpu == PROCESSOR_NIAGARA
2078 || sparc_cpu == PROCESSOR_NIAGARA2
2079 || sparc_cpu == PROCESSOR_NIAGARA3
2080 || sparc_cpu == PROCESSOR_NIAGARA4
2081 || sparc_cpu == PROCESSOR_NIAGARA7
2082 || sparc_cpu == PROCESSOR_M8)
2083 ? 16 : 64));
2085 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2086 that 512 is the default in params.def. */
2087 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2088 param_l2_cache_size,
2089 ((sparc_cpu == PROCESSOR_NIAGARA4
2090 || sparc_cpu == PROCESSOR_M8)
2091 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2092 ? 256 : 512)));
2095 /* Disable save slot sharing for call-clobbered registers by default.
2096 The IRA sharing algorithm works on single registers only and this
2097 pessimizes for double floating-point registers. */
2098 if (!global_options_set.x_flag_ira_share_save_slots)
2099 flag_ira_share_save_slots = 0;
2101 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2102 redundant 32-to-64-bit extensions. */
2103 if (!global_options_set.x_flag_ree && TARGET_ARCH32)
2104 flag_ree = 0;
2106 /* Do various machine dependent initializations. */
2107 sparc_init_modes ();
2109 /* Set up function hooks. */
2110 init_machine_status = sparc_init_machine_status;
2113 /* Miscellaneous utilities. */
2115 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2116 or branch on register contents instructions. */
2119 v9_regcmp_p (enum rtx_code code)
2121 return (code == EQ || code == NE || code == GE || code == LT
2122 || code == LE || code == GT);
2125 /* Nonzero if OP is a floating point constant which can
2126 be loaded into an integer register using a single
2127 sethi instruction. */
2130 fp_sethi_p (rtx op)
2132 if (GET_CODE (op) == CONST_DOUBLE)
2134 long i;
2136 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2137 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2140 return 0;
2143 /* Nonzero if OP is a floating point constant which can
2144 be loaded into an integer register using a single
2145 mov instruction. */
2148 fp_mov_p (rtx op)
2150 if (GET_CODE (op) == CONST_DOUBLE)
2152 long i;
2154 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2155 return SPARC_SIMM13_P (i);
2158 return 0;
2161 /* Nonzero if OP is a floating point constant which can
2162 be loaded into an integer register using a high/losum
2163 instruction sequence. */
2166 fp_high_losum_p (rtx op)
2168 /* The constraints calling this should only be in
2169 SFmode move insns, so any constant which cannot
2170 be moved using a single insn will do. */
2171 if (GET_CODE (op) == CONST_DOUBLE)
2173 long i;
2175 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2176 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2179 return 0;
2182 /* Return true if the address of LABEL can be loaded by means of the
2183 mov{si,di}_pic_label_ref patterns in PIC mode. */
2185 static bool
2186 can_use_mov_pic_label_ref (rtx label)
2188 /* VxWorks does not impose a fixed gap between segments; the run-time
2189 gap can be different from the object-file gap. We therefore can't
2190 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2191 are absolutely sure that X is in the same segment as the GOT.
2192 Unfortunately, the flexibility of linker scripts means that we
2193 can't be sure of that in general, so assume that GOT-relative
2194 accesses are never valid on VxWorks. */
2195 if (TARGET_VXWORKS_RTP)
2196 return false;
2198 /* Similarly, if the label is non-local, it might end up being placed
2199 in a different section than the current one; now mov_pic_label_ref
2200 requires the label and the code to be in the same section. */
2201 if (LABEL_REF_NONLOCAL_P (label))
2202 return false;
2204 /* Finally, if we are reordering basic blocks and partition into hot
2205 and cold sections, this might happen for any label. */
2206 if (flag_reorder_blocks_and_partition)
2207 return false;
2209 return true;
2212 /* Expand a move instruction. Return true if all work is done. */
2214 bool
2215 sparc_expand_move (machine_mode mode, rtx *operands)
2217 /* Handle sets of MEM first. */
2218 if (GET_CODE (operands[0]) == MEM)
2220 /* 0 is a register (or a pair of registers) on SPARC. */
2221 if (register_or_zero_operand (operands[1], mode))
2222 return false;
2224 if (!reload_in_progress)
2226 operands[0] = validize_mem (operands[0]);
2227 operands[1] = force_reg (mode, operands[1]);
2231 /* Fix up TLS cases. */
2232 if (TARGET_HAVE_TLS
2233 && CONSTANT_P (operands[1])
2234 && sparc_tls_referenced_p (operands [1]))
2236 operands[1] = sparc_legitimize_tls_address (operands[1]);
2237 return false;
2240 /* Fix up PIC cases. */
2241 if (flag_pic && CONSTANT_P (operands[1]))
2243 if (pic_address_needs_scratch (operands[1]))
2244 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2246 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2247 if ((GET_CODE (operands[1]) == LABEL_REF
2248 && can_use_mov_pic_label_ref (operands[1]))
2249 || (GET_CODE (operands[1]) == CONST
2250 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2251 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2252 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2253 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2255 if (mode == SImode)
2257 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2258 return true;
2261 if (mode == DImode)
2263 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2264 return true;
2268 if (symbolic_operand (operands[1], mode))
2270 operands[1]
2271 = sparc_legitimize_pic_address (operands[1],
2272 reload_in_progress
2273 ? operands[0] : NULL_RTX);
2274 return false;
2278 /* If we are trying to toss an integer constant into FP registers,
2279 or loading a FP or vector constant, force it into memory. */
2280 if (CONSTANT_P (operands[1])
2281 && REG_P (operands[0])
2282 && (SPARC_FP_REG_P (REGNO (operands[0]))
2283 || SCALAR_FLOAT_MODE_P (mode)
2284 || VECTOR_MODE_P (mode)))
2286 /* emit_group_store will send such bogosity to us when it is
2287 not storing directly into memory. So fix this up to avoid
2288 crashes in output_constant_pool. */
2289 if (operands [1] == const0_rtx)
2290 operands[1] = CONST0_RTX (mode);
2292 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2293 always other regs. */
2294 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2295 && (const_zero_operand (operands[1], mode)
2296 || const_all_ones_operand (operands[1], mode)))
2297 return false;
2299 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2300 /* We are able to build any SF constant in integer registers
2301 with at most 2 instructions. */
2302 && (mode == SFmode
2303 /* And any DF constant in integer registers if needed. */
2304 || (mode == DFmode && !can_create_pseudo_p ())))
2305 return false;
2307 operands[1] = force_const_mem (mode, operands[1]);
2308 if (!reload_in_progress)
2309 operands[1] = validize_mem (operands[1]);
2310 return false;
2313 /* Accept non-constants and valid constants unmodified. */
2314 if (!CONSTANT_P (operands[1])
2315 || GET_CODE (operands[1]) == HIGH
2316 || input_operand (operands[1], mode))
2317 return false;
2319 switch (mode)
2321 case E_QImode:
2322 /* All QImode constants require only one insn, so proceed. */
2323 break;
2325 case E_HImode:
2326 case E_SImode:
2327 sparc_emit_set_const32 (operands[0], operands[1]);
2328 return true;
2330 case E_DImode:
2331 /* input_operand should have filtered out 32-bit mode. */
2332 sparc_emit_set_const64 (operands[0], operands[1]);
2333 return true;
2335 case E_TImode:
2337 rtx high, low;
2338 /* TImode isn't available in 32-bit mode. */
2339 split_double (operands[1], &high, &low);
2340 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2341 high));
2342 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2343 low));
2345 return true;
2347 default:
2348 gcc_unreachable ();
2351 return false;
2354 /* Load OP1, a 32-bit constant, into OP0, a register.
2355 We know it can't be done in one insn when we get
2356 here, the move expander guarantees this. */
2358 static void
2359 sparc_emit_set_const32 (rtx op0, rtx op1)
2361 machine_mode mode = GET_MODE (op0);
2362 rtx temp = op0;
2364 if (can_create_pseudo_p ())
2365 temp = gen_reg_rtx (mode);
2367 if (GET_CODE (op1) == CONST_INT)
2369 gcc_assert (!small_int_operand (op1, mode)
2370 && !const_high_operand (op1, mode));
2372 /* Emit them as real moves instead of a HIGH/LO_SUM,
2373 this way CSE can see everything and reuse intermediate
2374 values if it wants. */
2375 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2376 & ~(HOST_WIDE_INT) 0x3ff)));
2378 emit_insn (gen_rtx_SET (op0,
2379 gen_rtx_IOR (mode, temp,
2380 GEN_INT (INTVAL (op1) & 0x3ff))));
2382 else
2384 /* A symbol, emit in the traditional way. */
2385 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2386 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2390 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2391 If TEMP is nonzero, we are forbidden to use any other scratch
2392 registers. Otherwise, we are allowed to generate them as needed.
2394 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2395 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2397 void
2398 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2400 rtx cst, temp1, temp2, temp3, temp4, temp5;
2401 rtx ti_temp = 0;
2403 /* Deal with too large offsets. */
2404 if (GET_CODE (op1) == CONST
2405 && GET_CODE (XEXP (op1, 0)) == PLUS
2406 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2407 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2409 gcc_assert (!temp);
2410 temp1 = gen_reg_rtx (DImode);
2411 temp2 = gen_reg_rtx (DImode);
2412 sparc_emit_set_const64 (temp2, cst);
2413 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2414 NULL_RTX);
2415 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2416 return;
2419 if (temp && GET_MODE (temp) == TImode)
2421 ti_temp = temp;
2422 temp = gen_rtx_REG (DImode, REGNO (temp));
2425 /* SPARC-V9 code model support. */
2426 switch (sparc_code_model)
2428 case CM_MEDLOW:
2429 /* The range spanned by all instructions in the object is less
2430 than 2^31 bytes (2GB) and the distance from any instruction
2431 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2432 than 2^31 bytes (2GB).
2434 The executable must be in the low 4TB of the virtual address
2435 space.
2437 sethi %hi(symbol), %temp1
2438 or %temp1, %lo(symbol), %reg */
2439 if (temp)
2440 temp1 = temp; /* op0 is allowed. */
2441 else
2442 temp1 = gen_reg_rtx (DImode);
2444 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2445 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2446 break;
2448 case CM_MEDMID:
2449 /* The range spanned by all instructions in the object is less
2450 than 2^31 bytes (2GB) and the distance from any instruction
2451 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2452 than 2^31 bytes (2GB).
2454 The executable must be in the low 16TB of the virtual address
2455 space.
2457 sethi %h44(symbol), %temp1
2458 or %temp1, %m44(symbol), %temp2
2459 sllx %temp2, 12, %temp3
2460 or %temp3, %l44(symbol), %reg */
2461 if (temp)
2463 temp1 = op0;
2464 temp2 = op0;
2465 temp3 = temp; /* op0 is allowed. */
2467 else
2469 temp1 = gen_reg_rtx (DImode);
2470 temp2 = gen_reg_rtx (DImode);
2471 temp3 = gen_reg_rtx (DImode);
2474 emit_insn (gen_seth44 (temp1, op1));
2475 emit_insn (gen_setm44 (temp2, temp1, op1));
2476 emit_insn (gen_rtx_SET (temp3,
2477 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2478 emit_insn (gen_setl44 (op0, temp3, op1));
2479 break;
2481 case CM_MEDANY:
2482 /* The range spanned by all instructions in the object is less
2483 than 2^31 bytes (2GB) and the distance from any instruction
2484 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2485 than 2^31 bytes (2GB).
2487 The executable can be placed anywhere in the virtual address
2488 space.
2490 sethi %hh(symbol), %temp1
2491 sethi %lm(symbol), %temp2
2492 or %temp1, %hm(symbol), %temp3
2493 sllx %temp3, 32, %temp4
2494 or %temp4, %temp2, %temp5
2495 or %temp5, %lo(symbol), %reg */
2496 if (temp)
2498 /* It is possible that one of the registers we got for operands[2]
2499 might coincide with that of operands[0] (which is why we made
2500 it TImode). Pick the other one to use as our scratch. */
2501 if (rtx_equal_p (temp, op0))
2503 gcc_assert (ti_temp);
2504 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2506 temp1 = op0;
2507 temp2 = temp; /* op0 is _not_ allowed, see above. */
2508 temp3 = op0;
2509 temp4 = op0;
2510 temp5 = op0;
2512 else
2514 temp1 = gen_reg_rtx (DImode);
2515 temp2 = gen_reg_rtx (DImode);
2516 temp3 = gen_reg_rtx (DImode);
2517 temp4 = gen_reg_rtx (DImode);
2518 temp5 = gen_reg_rtx (DImode);
2521 emit_insn (gen_sethh (temp1, op1));
2522 emit_insn (gen_setlm (temp2, op1));
2523 emit_insn (gen_sethm (temp3, temp1, op1));
2524 emit_insn (gen_rtx_SET (temp4,
2525 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2526 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2527 emit_insn (gen_setlo (op0, temp5, op1));
2528 break;
2530 case CM_EMBMEDANY:
2531 /* Old old old backwards compatibility kruft here.
2532 Essentially it is MEDLOW with a fixed 64-bit
2533 virtual base added to all data segment addresses.
2534 Text-segment stuff is computed like MEDANY, we can't
2535 reuse the code above because the relocation knobs
2536 look different.
2538 Data segment: sethi %hi(symbol), %temp1
2539 add %temp1, EMBMEDANY_BASE_REG, %temp2
2540 or %temp2, %lo(symbol), %reg */
2541 if (data_segment_operand (op1, GET_MODE (op1)))
2543 if (temp)
2545 temp1 = temp; /* op0 is allowed. */
2546 temp2 = op0;
2548 else
2550 temp1 = gen_reg_rtx (DImode);
2551 temp2 = gen_reg_rtx (DImode);
2554 emit_insn (gen_embmedany_sethi (temp1, op1));
2555 emit_insn (gen_embmedany_brsum (temp2, temp1));
2556 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2559 /* Text segment: sethi %uhi(symbol), %temp1
2560 sethi %hi(symbol), %temp2
2561 or %temp1, %ulo(symbol), %temp3
2562 sllx %temp3, 32, %temp4
2563 or %temp4, %temp2, %temp5
2564 or %temp5, %lo(symbol), %reg */
2565 else
2567 if (temp)
2569 /* It is possible that one of the registers we got for operands[2]
2570 might coincide with that of operands[0] (which is why we made
2571 it TImode). Pick the other one to use as our scratch. */
2572 if (rtx_equal_p (temp, op0))
2574 gcc_assert (ti_temp);
2575 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2577 temp1 = op0;
2578 temp2 = temp; /* op0 is _not_ allowed, see above. */
2579 temp3 = op0;
2580 temp4 = op0;
2581 temp5 = op0;
2583 else
2585 temp1 = gen_reg_rtx (DImode);
2586 temp2 = gen_reg_rtx (DImode);
2587 temp3 = gen_reg_rtx (DImode);
2588 temp4 = gen_reg_rtx (DImode);
2589 temp5 = gen_reg_rtx (DImode);
2592 emit_insn (gen_embmedany_textuhi (temp1, op1));
2593 emit_insn (gen_embmedany_texthi (temp2, op1));
2594 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2595 emit_insn (gen_rtx_SET (temp4,
2596 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2597 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2598 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2600 break;
2602 default:
2603 gcc_unreachable ();
2607 /* These avoid problems when cross compiling. If we do not
2608 go through all this hair then the optimizer will see
2609 invalid REG_EQUAL notes or in some cases none at all. */
2610 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2611 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2612 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2613 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2615 /* The optimizer is not to assume anything about exactly
2616 which bits are set for a HIGH, they are unspecified.
2617 Unfortunately this leads to many missed optimizations
2618 during CSE. We mask out the non-HIGH bits, and matches
2619 a plain movdi, to alleviate this problem. */
2620 static rtx
2621 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2623 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2626 static rtx
2627 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2629 return gen_rtx_SET (dest, GEN_INT (val));
2632 static rtx
2633 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2635 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2638 static rtx
2639 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2641 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2644 /* Worker routines for 64-bit constant formation on arch64.
2645 One of the key things to be doing in these emissions is
2646 to create as many temp REGs as possible. This makes it
2647 possible for half-built constants to be used later when
2648 such values are similar to something required later on.
2649 Without doing this, the optimizer cannot see such
2650 opportunities. */
2652 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2653 unsigned HOST_WIDE_INT, int);
2655 static void
2656 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2657 unsigned HOST_WIDE_INT low_bits, int is_neg)
2659 unsigned HOST_WIDE_INT high_bits;
2661 if (is_neg)
2662 high_bits = (~low_bits) & 0xffffffff;
2663 else
2664 high_bits = low_bits;
2666 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2667 if (!is_neg)
2669 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2671 else
2673 /* If we are XOR'ing with -1, then we should emit a one's complement
2674 instead. This way the combiner will notice logical operations
2675 such as ANDN later on and substitute. */
2676 if ((low_bits & 0x3ff) == 0x3ff)
2678 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2680 else
2682 emit_insn (gen_rtx_SET (op0,
2683 gen_safe_XOR64 (temp,
2684 (-(HOST_WIDE_INT)0x400
2685 | (low_bits & 0x3ff)))));
2690 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2691 unsigned HOST_WIDE_INT, int);
2693 static void
2694 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2695 unsigned HOST_WIDE_INT high_bits,
2696 unsigned HOST_WIDE_INT low_immediate,
2697 int shift_count)
2699 rtx temp2 = op0;
2701 if ((high_bits & 0xfffffc00) != 0)
2703 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2704 if ((high_bits & ~0xfffffc00) != 0)
2705 emit_insn (gen_rtx_SET (op0,
2706 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2707 else
2708 temp2 = temp;
2710 else
2712 emit_insn (gen_safe_SET64 (temp, high_bits));
2713 temp2 = temp;
2716 /* Now shift it up into place. */
2717 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2718 GEN_INT (shift_count))));
2720 /* If there is a low immediate part piece, finish up by
2721 putting that in as well. */
2722 if (low_immediate != 0)
2723 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2726 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2727 unsigned HOST_WIDE_INT);
2729 /* Full 64-bit constant decomposition. Even though this is the
2730 'worst' case, we still optimize a few things away. */
2731 static void
2732 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2733 unsigned HOST_WIDE_INT high_bits,
2734 unsigned HOST_WIDE_INT low_bits)
2736 rtx sub_temp = op0;
2738 if (can_create_pseudo_p ())
2739 sub_temp = gen_reg_rtx (DImode);
2741 if ((high_bits & 0xfffffc00) != 0)
2743 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2744 if ((high_bits & ~0xfffffc00) != 0)
2745 emit_insn (gen_rtx_SET (sub_temp,
2746 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2747 else
2748 sub_temp = temp;
2750 else
2752 emit_insn (gen_safe_SET64 (temp, high_bits));
2753 sub_temp = temp;
2756 if (can_create_pseudo_p ())
2758 rtx temp2 = gen_reg_rtx (DImode);
2759 rtx temp3 = gen_reg_rtx (DImode);
2760 rtx temp4 = gen_reg_rtx (DImode);
2762 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2763 GEN_INT (32))));
2765 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2766 if ((low_bits & ~0xfffffc00) != 0)
2768 emit_insn (gen_rtx_SET (temp3,
2769 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2770 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2772 else
2774 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2777 else
2779 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2780 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2781 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2782 int to_shift = 12;
2784 /* We are in the middle of reload, so this is really
2785 painful. However we do still make an attempt to
2786 avoid emitting truly stupid code. */
2787 if (low1 != const0_rtx)
2789 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2790 GEN_INT (to_shift))));
2791 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2792 sub_temp = op0;
2793 to_shift = 12;
2795 else
2797 to_shift += 12;
2799 if (low2 != const0_rtx)
2801 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2802 GEN_INT (to_shift))));
2803 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2804 sub_temp = op0;
2805 to_shift = 8;
2807 else
2809 to_shift += 8;
2811 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2812 GEN_INT (to_shift))));
2813 if (low3 != const0_rtx)
2814 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2815 /* phew... */
2819 /* Analyze a 64-bit constant for certain properties. */
2820 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2821 unsigned HOST_WIDE_INT,
2822 int *, int *, int *);
2824 static void
2825 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2826 unsigned HOST_WIDE_INT low_bits,
2827 int *hbsp, int *lbsp, int *abbasp)
2829 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2830 int i;
2832 lowest_bit_set = highest_bit_set = -1;
2833 i = 0;
2836 if ((lowest_bit_set == -1)
2837 && ((low_bits >> i) & 1))
2838 lowest_bit_set = i;
2839 if ((highest_bit_set == -1)
2840 && ((high_bits >> (32 - i - 1)) & 1))
2841 highest_bit_set = (64 - i - 1);
2843 while (++i < 32
2844 && ((highest_bit_set == -1)
2845 || (lowest_bit_set == -1)));
2846 if (i == 32)
2848 i = 0;
2851 if ((lowest_bit_set == -1)
2852 && ((high_bits >> i) & 1))
2853 lowest_bit_set = i + 32;
2854 if ((highest_bit_set == -1)
2855 && ((low_bits >> (32 - i - 1)) & 1))
2856 highest_bit_set = 32 - i - 1;
2858 while (++i < 32
2859 && ((highest_bit_set == -1)
2860 || (lowest_bit_set == -1)));
2862 /* If there are no bits set this should have gone out
2863 as one instruction! */
2864 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2865 all_bits_between_are_set = 1;
2866 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2868 if (i < 32)
2870 if ((low_bits & (1 << i)) != 0)
2871 continue;
2873 else
2875 if ((high_bits & (1 << (i - 32))) != 0)
2876 continue;
2878 all_bits_between_are_set = 0;
2879 break;
2881 *hbsp = highest_bit_set;
2882 *lbsp = lowest_bit_set;
2883 *abbasp = all_bits_between_are_set;
2886 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2888 static int
2889 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2890 unsigned HOST_WIDE_INT low_bits)
2892 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2894 if (high_bits == 0
2895 || high_bits == 0xffffffff)
2896 return 1;
2898 analyze_64bit_constant (high_bits, low_bits,
2899 &highest_bit_set, &lowest_bit_set,
2900 &all_bits_between_are_set);
2902 if ((highest_bit_set == 63
2903 || lowest_bit_set == 0)
2904 && all_bits_between_are_set != 0)
2905 return 1;
2907 if ((highest_bit_set - lowest_bit_set) < 21)
2908 return 1;
2910 return 0;
2913 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2914 unsigned HOST_WIDE_INT,
2915 int, int);
2917 static unsigned HOST_WIDE_INT
2918 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2919 unsigned HOST_WIDE_INT low_bits,
2920 int lowest_bit_set, int shift)
2922 HOST_WIDE_INT hi, lo;
2924 if (lowest_bit_set < 32)
2926 lo = (low_bits >> lowest_bit_set) << shift;
2927 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
2929 else
2931 lo = 0;
2932 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
2934 gcc_assert (! (hi & lo));
2935 return (hi | lo);
2938 /* Here we are sure to be arch64 and this is an integer constant
2939 being loaded into a register. Emit the most efficient
2940 insn sequence possible. Detection of all the 1-insn cases
2941 has been done already. */
2942 static void
2943 sparc_emit_set_const64 (rtx op0, rtx op1)
2945 unsigned HOST_WIDE_INT high_bits, low_bits;
2946 int lowest_bit_set, highest_bit_set;
2947 int all_bits_between_are_set;
2948 rtx temp = 0;
2950 /* Sanity check that we know what we are working with. */
2951 gcc_assert (TARGET_ARCH64
2952 && (GET_CODE (op0) == SUBREG
2953 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
2955 if (! can_create_pseudo_p ())
2956 temp = op0;
2958 if (GET_CODE (op1) != CONST_INT)
2960 sparc_emit_set_symbolic_const64 (op0, op1, temp);
2961 return;
2964 if (! temp)
2965 temp = gen_reg_rtx (DImode);
2967 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
2968 low_bits = (INTVAL (op1) & 0xffffffff);
2970 /* low_bits bits 0 --> 31
2971 high_bits bits 32 --> 63 */
2973 analyze_64bit_constant (high_bits, low_bits,
2974 &highest_bit_set, &lowest_bit_set,
2975 &all_bits_between_are_set);
2977 /* First try for a 2-insn sequence. */
2979 /* These situations are preferred because the optimizer can
2980 * do more things with them:
2981 * 1) mov -1, %reg
2982 * sllx %reg, shift, %reg
2983 * 2) mov -1, %reg
2984 * srlx %reg, shift, %reg
2985 * 3) mov some_small_const, %reg
2986 * sllx %reg, shift, %reg
2988 if (((highest_bit_set == 63
2989 || lowest_bit_set == 0)
2990 && all_bits_between_are_set != 0)
2991 || ((highest_bit_set - lowest_bit_set) < 12))
2993 HOST_WIDE_INT the_const = -1;
2994 int shift = lowest_bit_set;
2996 if ((highest_bit_set != 63
2997 && lowest_bit_set != 0)
2998 || all_bits_between_are_set == 0)
3000 the_const =
3001 create_simple_focus_bits (high_bits, low_bits,
3002 lowest_bit_set, 0);
3004 else if (lowest_bit_set == 0)
3005 shift = -(63 - highest_bit_set);
3007 gcc_assert (SPARC_SIMM13_P (the_const));
3008 gcc_assert (shift != 0);
3010 emit_insn (gen_safe_SET64 (temp, the_const));
3011 if (shift > 0)
3012 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3013 GEN_INT (shift))));
3014 else if (shift < 0)
3015 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3016 GEN_INT (-shift))));
3017 return;
3020 /* Now a range of 22 or less bits set somewhere.
3021 * 1) sethi %hi(focus_bits), %reg
3022 * sllx %reg, shift, %reg
3023 * 2) sethi %hi(focus_bits), %reg
3024 * srlx %reg, shift, %reg
3026 if ((highest_bit_set - lowest_bit_set) < 21)
3028 unsigned HOST_WIDE_INT focus_bits =
3029 create_simple_focus_bits (high_bits, low_bits,
3030 lowest_bit_set, 10);
3032 gcc_assert (SPARC_SETHI_P (focus_bits));
3033 gcc_assert (lowest_bit_set != 10);
3035 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3037 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3038 if (lowest_bit_set < 10)
3039 emit_insn (gen_rtx_SET (op0,
3040 gen_rtx_LSHIFTRT (DImode, temp,
3041 GEN_INT (10 - lowest_bit_set))));
3042 else if (lowest_bit_set > 10)
3043 emit_insn (gen_rtx_SET (op0,
3044 gen_rtx_ASHIFT (DImode, temp,
3045 GEN_INT (lowest_bit_set - 10))));
3046 return;
3049 /* 1) sethi %hi(low_bits), %reg
3050 * or %reg, %lo(low_bits), %reg
3051 * 2) sethi %hi(~low_bits), %reg
3052 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3054 if (high_bits == 0
3055 || high_bits == 0xffffffff)
3057 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3058 (high_bits == 0xffffffff));
3059 return;
3062 /* Now, try 3-insn sequences. */
3064 /* 1) sethi %hi(high_bits), %reg
3065 * or %reg, %lo(high_bits), %reg
3066 * sllx %reg, 32, %reg
3068 if (low_bits == 0)
3070 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3071 return;
3074 /* We may be able to do something quick
3075 when the constant is negated, so try that. */
3076 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3077 (~low_bits) & 0xfffffc00))
3079 /* NOTE: The trailing bits get XOR'd so we need the
3080 non-negated bits, not the negated ones. */
3081 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3083 if ((((~high_bits) & 0xffffffff) == 0
3084 && ((~low_bits) & 0x80000000) == 0)
3085 || (((~high_bits) & 0xffffffff) == 0xffffffff
3086 && ((~low_bits) & 0x80000000) != 0))
3088 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3090 if ((SPARC_SETHI_P (fast_int)
3091 && (~high_bits & 0xffffffff) == 0)
3092 || SPARC_SIMM13_P (fast_int))
3093 emit_insn (gen_safe_SET64 (temp, fast_int));
3094 else
3095 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3097 else
3099 rtx negated_const;
3100 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3101 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3102 sparc_emit_set_const64 (temp, negated_const);
3105 /* If we are XOR'ing with -1, then we should emit a one's complement
3106 instead. This way the combiner will notice logical operations
3107 such as ANDN later on and substitute. */
3108 if (trailing_bits == 0x3ff)
3110 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3112 else
3114 emit_insn (gen_rtx_SET (op0,
3115 gen_safe_XOR64 (temp,
3116 (-0x400 | trailing_bits))));
3118 return;
3121 /* 1) sethi %hi(xxx), %reg
3122 * or %reg, %lo(xxx), %reg
3123 * sllx %reg, yyy, %reg
3125 * ??? This is just a generalized version of the low_bits==0
3126 * thing above, FIXME...
3128 if ((highest_bit_set - lowest_bit_set) < 32)
3130 unsigned HOST_WIDE_INT focus_bits =
3131 create_simple_focus_bits (high_bits, low_bits,
3132 lowest_bit_set, 0);
3134 /* We can't get here in this state. */
3135 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3137 /* So what we know is that the set bits straddle the
3138 middle of the 64-bit word. */
3139 sparc_emit_set_const64_quick2 (op0, temp,
3140 focus_bits, 0,
3141 lowest_bit_set);
3142 return;
3145 /* 1) sethi %hi(high_bits), %reg
3146 * or %reg, %lo(high_bits), %reg
3147 * sllx %reg, 32, %reg
3148 * or %reg, low_bits, %reg
3150 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3152 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3153 return;
3156 /* The easiest way when all else fails, is full decomposition. */
3157 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3160 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3162 static bool
3163 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3165 *p1 = SPARC_ICC_REG;
3166 *p2 = SPARC_FCC_REG;
3167 return true;
3170 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3172 static unsigned int
3173 sparc_min_arithmetic_precision (void)
3175 return 32;
3178 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3179 return the mode to be used for the comparison. For floating-point,
3180 CCFP[E]mode is used. CCNZmode should be used when the first operand
3181 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3182 processing is needed. */
3184 machine_mode
3185 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3187 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3189 switch (op)
3191 case EQ:
3192 case NE:
3193 case UNORDERED:
3194 case ORDERED:
3195 case UNLT:
3196 case UNLE:
3197 case UNGT:
3198 case UNGE:
3199 case UNEQ:
3200 return CCFPmode;
3202 case LT:
3203 case LE:
3204 case GT:
3205 case GE:
3206 case LTGT:
3207 return CCFPEmode;
3209 default:
3210 gcc_unreachable ();
3213 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3214 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3215 && y == const0_rtx)
3217 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3218 return CCXNZmode;
3219 else
3220 return CCNZmode;
3222 else
3224 /* This is for the cmp<mode>_sne pattern. */
3225 if (GET_CODE (x) == NOT && y == constm1_rtx)
3227 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3228 return CCXCmode;
3229 else
3230 return CCCmode;
3233 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3234 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3236 if (GET_CODE (y) == UNSPEC
3237 && (XINT (y, 1) == UNSPEC_ADDV
3238 || XINT (y, 1) == UNSPEC_SUBV
3239 || XINT (y, 1) == UNSPEC_NEGV))
3240 return CCVmode;
3241 else
3242 return CCCmode;
3245 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3246 return CCXmode;
3247 else
3248 return CCmode;
3252 /* Emit the compare insn and return the CC reg for a CODE comparison
3253 with operands X and Y. */
3255 static rtx
3256 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3258 machine_mode mode;
3259 rtx cc_reg;
3261 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3262 return x;
3264 mode = SELECT_CC_MODE (code, x, y);
3266 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3267 fcc regs (cse can't tell they're really call clobbered regs and will
3268 remove a duplicate comparison even if there is an intervening function
3269 call - it will then try to reload the cc reg via an int reg which is why
3270 we need the movcc patterns). It is possible to provide the movcc
3271 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3272 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3273 to tell cse that CCFPE mode registers (even pseudos) are call
3274 clobbered. */
3276 /* ??? This is an experiment. Rather than making changes to cse which may
3277 or may not be easy/clean, we do our own cse. This is possible because
3278 we will generate hard registers. Cse knows they're call clobbered (it
3279 doesn't know the same thing about pseudos). If we guess wrong, no big
3280 deal, but if we win, great! */
3282 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3283 #if 1 /* experiment */
3285 int reg;
3286 /* We cycle through the registers to ensure they're all exercised. */
3287 static int next_fcc_reg = 0;
3288 /* Previous x,y for each fcc reg. */
3289 static rtx prev_args[4][2];
3291 /* Scan prev_args for x,y. */
3292 for (reg = 0; reg < 4; reg++)
3293 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3294 break;
3295 if (reg == 4)
3297 reg = next_fcc_reg;
3298 prev_args[reg][0] = x;
3299 prev_args[reg][1] = y;
3300 next_fcc_reg = (next_fcc_reg + 1) & 3;
3302 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3304 #else
3305 cc_reg = gen_reg_rtx (mode);
3306 #endif /* ! experiment */
3307 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3308 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3309 else
3310 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3312 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3313 will only result in an unrecognizable insn so no point in asserting. */
3314 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3316 return cc_reg;
3320 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3323 gen_compare_reg (rtx cmp)
3325 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3328 /* This function is used for v9 only.
3329 DEST is the target of the Scc insn.
3330 CODE is the code for an Scc's comparison.
3331 X and Y are the values we compare.
3333 This function is needed to turn
3335 (set (reg:SI 110)
3336 (gt (reg:CCX 100 %icc)
3337 (const_int 0)))
3338 into
3339 (set (reg:SI 110)
3340 (gt:DI (reg:CCX 100 %icc)
3341 (const_int 0)))
3343 IE: The instruction recognizer needs to see the mode of the comparison to
3344 find the right instruction. We could use "gt:DI" right in the
3345 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3347 static int
3348 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3350 if (! TARGET_ARCH64
3351 && (GET_MODE (x) == DImode
3352 || GET_MODE (dest) == DImode))
3353 return 0;
3355 /* Try to use the movrCC insns. */
3356 if (TARGET_ARCH64
3357 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3358 && y == const0_rtx
3359 && v9_regcmp_p (compare_code))
3361 rtx op0 = x;
3362 rtx temp;
3364 /* Special case for op0 != 0. This can be done with one instruction if
3365 dest == x. */
3367 if (compare_code == NE
3368 && GET_MODE (dest) == DImode
3369 && rtx_equal_p (op0, dest))
3371 emit_insn (gen_rtx_SET (dest,
3372 gen_rtx_IF_THEN_ELSE (DImode,
3373 gen_rtx_fmt_ee (compare_code, DImode,
3374 op0, const0_rtx),
3375 const1_rtx,
3376 dest)));
3377 return 1;
3380 if (reg_overlap_mentioned_p (dest, op0))
3382 /* Handle the case where dest == x.
3383 We "early clobber" the result. */
3384 op0 = gen_reg_rtx (GET_MODE (x));
3385 emit_move_insn (op0, x);
3388 emit_insn (gen_rtx_SET (dest, const0_rtx));
3389 if (GET_MODE (op0) != DImode)
3391 temp = gen_reg_rtx (DImode);
3392 convert_move (temp, op0, 0);
3394 else
3395 temp = op0;
3396 emit_insn (gen_rtx_SET (dest,
3397 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3398 gen_rtx_fmt_ee (compare_code, DImode,
3399 temp, const0_rtx),
3400 const1_rtx,
3401 dest)));
3402 return 1;
3404 else
3406 x = gen_compare_reg_1 (compare_code, x, y);
3407 y = const0_rtx;
3409 emit_insn (gen_rtx_SET (dest, const0_rtx));
3410 emit_insn (gen_rtx_SET (dest,
3411 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3412 gen_rtx_fmt_ee (compare_code,
3413 GET_MODE (x), x, y),
3414 const1_rtx, dest)));
3415 return 1;
3420 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3421 without jumps using the addx/subx instructions. */
3423 bool
3424 emit_scc_insn (rtx operands[])
3426 rtx tem, x, y;
3427 enum rtx_code code;
3428 machine_mode mode;
3430 /* The quad-word fp compare library routines all return nonzero to indicate
3431 true, which is different from the equivalent libgcc routines, so we must
3432 handle them specially here. */
3433 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3435 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3436 GET_CODE (operands[1]));
3437 operands[2] = XEXP (operands[1], 0);
3438 operands[3] = XEXP (operands[1], 1);
3441 code = GET_CODE (operands[1]);
3442 x = operands[2];
3443 y = operands[3];
3444 mode = GET_MODE (x);
3446 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3447 more applications). The exception to this is "reg != 0" which can
3448 be done in one instruction on v9 (so we do it). */
3449 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3451 if (y != const0_rtx)
3452 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3454 rtx pat = gen_rtx_SET (operands[0],
3455 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3456 x, const0_rtx));
3458 /* If we can use addx/subx or addxc, add a clobber for CC. */
3459 if (mode == SImode || (code == NE && TARGET_VIS3))
3461 rtx clobber
3462 = gen_rtx_CLOBBER (VOIDmode,
3463 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3464 SPARC_ICC_REG));
3465 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3468 emit_insn (pat);
3469 return true;
3472 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3473 if (TARGET_ARCH64
3474 && mode == DImode
3475 && !((code == LTU || code == GTU) && TARGET_VIS3)
3476 && gen_v9_scc (operands[0], code, x, y))
3477 return true;
3479 /* We can do LTU and GEU using the addx/subx instructions too. And
3480 for GTU/LEU, if both operands are registers swap them and fall
3481 back to the easy case. */
3482 if (code == GTU || code == LEU)
3484 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3485 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3487 tem = x;
3488 x = y;
3489 y = tem;
3490 code = swap_condition (code);
3494 if (code == LTU || code == GEU)
3496 emit_insn (gen_rtx_SET (operands[0],
3497 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3498 gen_compare_reg_1 (code, x, y),
3499 const0_rtx)));
3500 return true;
3503 /* All the posibilities to use addx/subx based sequences has been
3504 exhausted, try for a 3 instruction sequence using v9 conditional
3505 moves. */
3506 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3507 return true;
3509 /* Nope, do branches. */
3510 return false;
3513 /* Emit a conditional jump insn for the v9 architecture using comparison code
3514 CODE and jump target LABEL.
3515 This function exists to take advantage of the v9 brxx insns. */
3517 static void
3518 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3520 emit_jump_insn (gen_rtx_SET (pc_rtx,
3521 gen_rtx_IF_THEN_ELSE (VOIDmode,
3522 gen_rtx_fmt_ee (code, GET_MODE (op0),
3523 op0, const0_rtx),
3524 gen_rtx_LABEL_REF (VOIDmode, label),
3525 pc_rtx)));
3528 /* Emit a conditional jump insn for the UA2011 architecture using
3529 comparison code CODE and jump target LABEL. This function exists
3530 to take advantage of the UA2011 Compare and Branch insns. */
3532 static void
3533 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3535 rtx if_then_else;
3537 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3538 gen_rtx_fmt_ee(code, GET_MODE(op0),
3539 op0, op1),
3540 gen_rtx_LABEL_REF (VOIDmode, label),
3541 pc_rtx);
3543 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3546 void
3547 emit_conditional_branch_insn (rtx operands[])
3549 /* The quad-word fp compare library routines all return nonzero to indicate
3550 true, which is different from the equivalent libgcc routines, so we must
3551 handle them specially here. */
3552 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3554 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3555 GET_CODE (operands[0]));
3556 operands[1] = XEXP (operands[0], 0);
3557 operands[2] = XEXP (operands[0], 1);
3560 /* If we can tell early on that the comparison is against a constant
3561 that won't fit in the 5-bit signed immediate field of a cbcond,
3562 use one of the other v9 conditional branch sequences. */
3563 if (TARGET_CBCOND
3564 && GET_CODE (operands[1]) == REG
3565 && (GET_MODE (operands[1]) == SImode
3566 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3567 && (GET_CODE (operands[2]) != CONST_INT
3568 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3570 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3571 return;
3574 if (TARGET_ARCH64 && operands[2] == const0_rtx
3575 && GET_CODE (operands[1]) == REG
3576 && GET_MODE (operands[1]) == DImode)
3578 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3579 return;
3582 operands[1] = gen_compare_reg (operands[0]);
3583 operands[2] = const0_rtx;
3584 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3585 operands[1], operands[2]);
3586 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3587 operands[3]));
3591 /* Generate a DFmode part of a hard TFmode register.
3592 REG is the TFmode hard register, LOW is 1 for the
3593 low 64bit of the register and 0 otherwise.
3596 gen_df_reg (rtx reg, int low)
3598 int regno = REGNO (reg);
3600 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3601 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3602 return gen_rtx_REG (DFmode, regno);
3605 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3606 Unlike normal calls, TFmode operands are passed by reference. It is
3607 assumed that no more than 3 operands are required. */
3609 static void
3610 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3612 rtx ret_slot = NULL, arg[3], func_sym;
3613 int i;
3615 /* We only expect to be called for conversions, unary, and binary ops. */
3616 gcc_assert (nargs == 2 || nargs == 3);
3618 for (i = 0; i < nargs; ++i)
3620 rtx this_arg = operands[i];
3621 rtx this_slot;
3623 /* TFmode arguments and return values are passed by reference. */
3624 if (GET_MODE (this_arg) == TFmode)
3626 int force_stack_temp;
3628 force_stack_temp = 0;
3629 if (TARGET_BUGGY_QP_LIB && i == 0)
3630 force_stack_temp = 1;
3632 if (GET_CODE (this_arg) == MEM
3633 && ! force_stack_temp)
3635 tree expr = MEM_EXPR (this_arg);
3636 if (expr)
3637 mark_addressable (expr);
3638 this_arg = XEXP (this_arg, 0);
3640 else if (CONSTANT_P (this_arg)
3641 && ! force_stack_temp)
3643 this_slot = force_const_mem (TFmode, this_arg);
3644 this_arg = XEXP (this_slot, 0);
3646 else
3648 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3650 /* Operand 0 is the return value. We'll copy it out later. */
3651 if (i > 0)
3652 emit_move_insn (this_slot, this_arg);
3653 else
3654 ret_slot = this_slot;
3656 this_arg = XEXP (this_slot, 0);
3660 arg[i] = this_arg;
3663 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3665 if (GET_MODE (operands[0]) == TFmode)
3667 if (nargs == 2)
3668 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3669 arg[0], GET_MODE (arg[0]),
3670 arg[1], GET_MODE (arg[1]));
3671 else
3672 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3673 arg[0], GET_MODE (arg[0]),
3674 arg[1], GET_MODE (arg[1]),
3675 arg[2], GET_MODE (arg[2]));
3677 if (ret_slot)
3678 emit_move_insn (operands[0], ret_slot);
3680 else
3682 rtx ret;
3684 gcc_assert (nargs == 2);
3686 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3687 GET_MODE (operands[0]),
3688 arg[1], GET_MODE (arg[1]));
3690 if (ret != operands[0])
3691 emit_move_insn (operands[0], ret);
3695 /* Expand soft-float TFmode calls to sparc abi routines. */
3697 static void
3698 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3700 const char *func;
3702 switch (code)
3704 case PLUS:
3705 func = "_Qp_add";
3706 break;
3707 case MINUS:
3708 func = "_Qp_sub";
3709 break;
3710 case MULT:
3711 func = "_Qp_mul";
3712 break;
3713 case DIV:
3714 func = "_Qp_div";
3715 break;
3716 default:
3717 gcc_unreachable ();
3720 emit_soft_tfmode_libcall (func, 3, operands);
3723 static void
3724 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3726 const char *func;
3728 gcc_assert (code == SQRT);
3729 func = "_Qp_sqrt";
3731 emit_soft_tfmode_libcall (func, 2, operands);
3734 static void
3735 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3737 const char *func;
3739 switch (code)
3741 case FLOAT_EXTEND:
3742 switch (GET_MODE (operands[1]))
3744 case E_SFmode:
3745 func = "_Qp_stoq";
3746 break;
3747 case E_DFmode:
3748 func = "_Qp_dtoq";
3749 break;
3750 default:
3751 gcc_unreachable ();
3753 break;
3755 case FLOAT_TRUNCATE:
3756 switch (GET_MODE (operands[0]))
3758 case E_SFmode:
3759 func = "_Qp_qtos";
3760 break;
3761 case E_DFmode:
3762 func = "_Qp_qtod";
3763 break;
3764 default:
3765 gcc_unreachable ();
3767 break;
3769 case FLOAT:
3770 switch (GET_MODE (operands[1]))
3772 case E_SImode:
3773 func = "_Qp_itoq";
3774 if (TARGET_ARCH64)
3775 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3776 break;
3777 case E_DImode:
3778 func = "_Qp_xtoq";
3779 break;
3780 default:
3781 gcc_unreachable ();
3783 break;
3785 case UNSIGNED_FLOAT:
3786 switch (GET_MODE (operands[1]))
3788 case E_SImode:
3789 func = "_Qp_uitoq";
3790 if (TARGET_ARCH64)
3791 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3792 break;
3793 case E_DImode:
3794 func = "_Qp_uxtoq";
3795 break;
3796 default:
3797 gcc_unreachable ();
3799 break;
3801 case FIX:
3802 switch (GET_MODE (operands[0]))
3804 case E_SImode:
3805 func = "_Qp_qtoi";
3806 break;
3807 case E_DImode:
3808 func = "_Qp_qtox";
3809 break;
3810 default:
3811 gcc_unreachable ();
3813 break;
3815 case UNSIGNED_FIX:
3816 switch (GET_MODE (operands[0]))
3818 case E_SImode:
3819 func = "_Qp_qtoui";
3820 break;
3821 case E_DImode:
3822 func = "_Qp_qtoux";
3823 break;
3824 default:
3825 gcc_unreachable ();
3827 break;
3829 default:
3830 gcc_unreachable ();
3833 emit_soft_tfmode_libcall (func, 2, operands);
3836 /* Expand a hard-float tfmode operation. All arguments must be in
3837 registers. */
3839 static void
3840 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3842 rtx op, dest;
3844 if (GET_RTX_CLASS (code) == RTX_UNARY)
3846 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3847 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3849 else
3851 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3852 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3853 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3854 operands[1], operands[2]);
3857 if (register_operand (operands[0], VOIDmode))
3858 dest = operands[0];
3859 else
3860 dest = gen_reg_rtx (GET_MODE (operands[0]));
3862 emit_insn (gen_rtx_SET (dest, op));
3864 if (dest != operands[0])
3865 emit_move_insn (operands[0], dest);
3868 void
3869 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3871 if (TARGET_HARD_QUAD)
3872 emit_hard_tfmode_operation (code, operands);
3873 else
3874 emit_soft_tfmode_binop (code, operands);
3877 void
3878 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3880 if (TARGET_HARD_QUAD)
3881 emit_hard_tfmode_operation (code, operands);
3882 else
3883 emit_soft_tfmode_unop (code, operands);
3886 void
3887 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3889 if (TARGET_HARD_QUAD)
3890 emit_hard_tfmode_operation (code, operands);
3891 else
3892 emit_soft_tfmode_cvt (code, operands);
3895 /* Return nonzero if a branch/jump/call instruction will be emitting
3896 nop into its delay slot. */
3899 empty_delay_slot (rtx_insn *insn)
3901 rtx seq;
3903 /* If no previous instruction (should not happen), return true. */
3904 if (PREV_INSN (insn) == NULL)
3905 return 1;
3907 seq = NEXT_INSN (PREV_INSN (insn));
3908 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3909 return 0;
3911 return 1;
3914 /* Return nonzero if we should emit a nop after a cbcond instruction.
3915 The cbcond instruction does not have a delay slot, however there is
3916 a severe performance penalty if a control transfer appears right
3917 after a cbcond. Therefore we emit a nop when we detect this
3918 situation. */
3921 emit_cbcond_nop (rtx_insn *insn)
3923 rtx next = next_active_insn (insn);
3925 if (!next)
3926 return 1;
3928 if (NONJUMP_INSN_P (next)
3929 && GET_CODE (PATTERN (next)) == SEQUENCE)
3930 next = XVECEXP (PATTERN (next), 0, 0);
3931 else if (CALL_P (next)
3932 && GET_CODE (PATTERN (next)) == PARALLEL)
3934 rtx delay = XVECEXP (PATTERN (next), 0, 1);
3936 if (GET_CODE (delay) == RETURN)
3938 /* It's a sibling call. Do not emit the nop if we're going
3939 to emit something other than the jump itself as the first
3940 instruction of the sibcall sequence. */
3941 if (sparc_leaf_function_p || TARGET_FLAT)
3942 return 0;
3946 if (NONJUMP_INSN_P (next))
3947 return 0;
3949 return 1;
3952 /* Return nonzero if TRIAL can go into the call delay slot. */
3955 eligible_for_call_delay (rtx_insn *trial)
3957 rtx pat;
3959 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
3960 return 0;
3962 /* The only problematic cases are TLS sequences with Sun as/ld. */
3963 if ((TARGET_GNU_TLS && HAVE_GNU_LD) || !TARGET_TLS)
3964 return 1;
3966 pat = PATTERN (trial);
3968 /* We must reject tgd_add{32|64}, i.e.
3969 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSGD)))
3970 and tldm_add{32|64}, i.e.
3971 (set (reg) (plus (reg) (unspec [(reg) (symbol_ref)] UNSPEC_TLSLDM)))
3972 for Sun as/ld. */
3973 if (GET_CODE (pat) == SET
3974 && GET_CODE (SET_SRC (pat)) == PLUS)
3976 rtx unspec = XEXP (SET_SRC (pat), 1);
3978 if (GET_CODE (unspec) == UNSPEC
3979 && (XINT (unspec, 1) == UNSPEC_TLSGD
3980 || XINT (unspec, 1) == UNSPEC_TLSLDM))
3981 return 0;
3984 return 1;
3987 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
3988 instruction. RETURN_P is true if the v9 variant 'return' is to be
3989 considered in the test too.
3991 TRIAL must be a SET whose destination is a REG appropriate for the
3992 'restore' instruction or, if RETURN_P is true, for the 'return'
3993 instruction. */
3995 static int
3996 eligible_for_restore_insn (rtx trial, bool return_p)
3998 rtx pat = PATTERN (trial);
3999 rtx src = SET_SRC (pat);
4000 bool src_is_freg = false;
4001 rtx src_reg;
4003 /* Since we now can do moves between float and integer registers when
4004 VIS3 is enabled, we have to catch this case. We can allow such
4005 moves when doing a 'return' however. */
4006 src_reg = src;
4007 if (GET_CODE (src_reg) == SUBREG)
4008 src_reg = SUBREG_REG (src_reg);
4009 if (GET_CODE (src_reg) == REG
4010 && SPARC_FP_REG_P (REGNO (src_reg)))
4011 src_is_freg = true;
4013 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4014 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4015 && arith_operand (src, GET_MODE (src))
4016 && ! src_is_freg)
4018 if (TARGET_ARCH64)
4019 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4020 else
4021 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4024 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4025 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4026 && arith_double_operand (src, GET_MODE (src))
4027 && ! src_is_freg)
4028 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4030 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4031 else if (! TARGET_FPU && register_operand (src, SFmode))
4032 return 1;
4034 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4035 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4036 return 1;
4038 /* If we have the 'return' instruction, anything that does not use
4039 local or output registers and can go into a delay slot wins. */
4040 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4041 return 1;
4043 /* The 'restore src1,src2,dest' pattern for SImode. */
4044 else if (GET_CODE (src) == PLUS
4045 && register_operand (XEXP (src, 0), SImode)
4046 && arith_operand (XEXP (src, 1), SImode))
4047 return 1;
4049 /* The 'restore src1,src2,dest' pattern for DImode. */
4050 else if (GET_CODE (src) == PLUS
4051 && register_operand (XEXP (src, 0), DImode)
4052 && arith_double_operand (XEXP (src, 1), DImode))
4053 return 1;
4055 /* The 'restore src1,%lo(src2),dest' pattern. */
4056 else if (GET_CODE (src) == LO_SUM
4057 && ! TARGET_CM_MEDMID
4058 && ((register_operand (XEXP (src, 0), SImode)
4059 && immediate_operand (XEXP (src, 1), SImode))
4060 || (TARGET_ARCH64
4061 && register_operand (XEXP (src, 0), DImode)
4062 && immediate_operand (XEXP (src, 1), DImode))))
4063 return 1;
4065 /* The 'restore src,src,dest' pattern. */
4066 else if (GET_CODE (src) == ASHIFT
4067 && (register_operand (XEXP (src, 0), SImode)
4068 || register_operand (XEXP (src, 0), DImode))
4069 && XEXP (src, 1) == const1_rtx)
4070 return 1;
4072 return 0;
4075 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4078 eligible_for_return_delay (rtx_insn *trial)
4080 int regno;
4081 rtx pat;
4083 /* If the function uses __builtin_eh_return, the eh_return machinery
4084 occupies the delay slot. */
4085 if (crtl->calls_eh_return)
4086 return 0;
4088 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4089 return 0;
4091 /* In the case of a leaf or flat function, anything can go into the slot. */
4092 if (sparc_leaf_function_p || TARGET_FLAT)
4093 return 1;
4095 if (!NONJUMP_INSN_P (trial))
4096 return 0;
4098 pat = PATTERN (trial);
4099 if (GET_CODE (pat) == PARALLEL)
4101 int i;
4103 if (! TARGET_V9)
4104 return 0;
4105 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4107 rtx expr = XVECEXP (pat, 0, i);
4108 if (GET_CODE (expr) != SET)
4109 return 0;
4110 if (GET_CODE (SET_DEST (expr)) != REG)
4111 return 0;
4112 regno = REGNO (SET_DEST (expr));
4113 if (regno >= 8 && regno < 24)
4114 return 0;
4116 return !epilogue_renumber (&pat, 1);
4119 if (GET_CODE (pat) != SET)
4120 return 0;
4122 if (GET_CODE (SET_DEST (pat)) != REG)
4123 return 0;
4125 regno = REGNO (SET_DEST (pat));
4127 /* Otherwise, only operations which can be done in tandem with
4128 a `restore' or `return' insn can go into the delay slot. */
4129 if (regno >= 8 && regno < 24)
4130 return 0;
4132 /* If this instruction sets up floating point register and we have a return
4133 instruction, it can probably go in. But restore will not work
4134 with FP_REGS. */
4135 if (! SPARC_INT_REG_P (regno))
4136 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4138 return eligible_for_restore_insn (trial, true);
4141 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4144 eligible_for_sibcall_delay (rtx_insn *trial)
4146 rtx pat;
4148 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4149 return 0;
4151 if (!NONJUMP_INSN_P (trial))
4152 return 0;
4154 pat = PATTERN (trial);
4156 if (sparc_leaf_function_p || TARGET_FLAT)
4158 /* If the tail call is done using the call instruction,
4159 we have to restore %o7 in the delay slot. */
4160 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4161 return 0;
4163 /* %g1 is used to build the function address */
4164 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4165 return 0;
4167 return 1;
4170 if (GET_CODE (pat) != SET)
4171 return 0;
4173 /* Otherwise, only operations which can be done in tandem with
4174 a `restore' insn can go into the delay slot. */
4175 if (GET_CODE (SET_DEST (pat)) != REG
4176 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4177 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4178 return 0;
4180 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4181 in most cases. */
4182 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4183 return 0;
4185 return eligible_for_restore_insn (trial, false);
4188 /* Determine if it's legal to put X into the constant pool. This
4189 is not possible if X contains the address of a symbol that is
4190 not constant (TLS) or not known at final link time (PIC). */
4192 static bool
4193 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4195 switch (GET_CODE (x))
4197 case CONST_INT:
4198 case CONST_WIDE_INT:
4199 case CONST_DOUBLE:
4200 case CONST_VECTOR:
4201 /* Accept all non-symbolic constants. */
4202 return false;
4204 case LABEL_REF:
4205 /* Labels are OK iff we are non-PIC. */
4206 return flag_pic != 0;
4208 case SYMBOL_REF:
4209 /* 'Naked' TLS symbol references are never OK,
4210 non-TLS symbols are OK iff we are non-PIC. */
4211 if (SYMBOL_REF_TLS_MODEL (x))
4212 return true;
4213 else
4214 return flag_pic != 0;
4216 case CONST:
4217 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4218 case PLUS:
4219 case MINUS:
4220 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4221 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4222 case UNSPEC:
4223 return true;
4224 default:
4225 gcc_unreachable ();
4229 /* Global Offset Table support. */
4230 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4231 static GTY(()) rtx got_register_rtx = NULL_RTX;
4232 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4234 static GTY(()) bool got_helper_needed = false;
4236 /* Return the SYMBOL_REF for the Global Offset Table. */
4238 static rtx
4239 sparc_got (void)
4241 if (!got_symbol_rtx)
4242 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4244 return got_symbol_rtx;
4247 /* Wrapper around the load_pcrel_sym{si,di} patterns. */
4249 static rtx
4250 gen_load_pcrel_sym (rtx op0, rtx op1, rtx op2)
4252 int orig_flag_pic = flag_pic;
4253 rtx insn;
4255 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4256 flag_pic = 0;
4257 if (TARGET_ARCH64)
4258 insn = gen_load_pcrel_symdi (op0, op1, op2, GEN_INT (REGNO (op0)));
4259 else
4260 insn = gen_load_pcrel_symsi (op0, op1, op2, GEN_INT (REGNO (op0)));
4261 flag_pic = orig_flag_pic;
4263 return insn;
4266 /* Output the load_pcrel_sym{si,di} patterns. */
4268 const char *
4269 output_load_pcrel_sym (rtx *operands)
4271 if (flag_delayed_branch)
4273 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4274 output_asm_insn ("call\t%a2", operands);
4275 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4277 else
4279 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4280 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4281 output_asm_insn ("call\t%a2", operands);
4282 output_asm_insn (" nop", NULL);
4285 if (operands[2] == got_helper_rtx)
4286 got_helper_needed = true;
4288 return "";
4291 #ifdef HAVE_GAS_HIDDEN
4292 # define USE_HIDDEN_LINKONCE 1
4293 #else
4294 # define USE_HIDDEN_LINKONCE 0
4295 #endif
4297 /* Emit code to load the GOT register. */
4299 void
4300 load_got_register (void)
4302 rtx insn;
4304 if (TARGET_VXWORKS_RTP)
4306 if (!got_register_rtx)
4307 got_register_rtx = pic_offset_table_rtx;
4309 insn = gen_vxworks_load_got ();
4311 else
4313 if (!got_register_rtx)
4314 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4316 /* The GOT symbol is subject to a PC-relative relocation so we need a
4317 helper function to add the PC value and thus get the final value. */
4318 if (!got_helper_rtx)
4320 char name[32];
4322 /* Skip the leading '%' as that cannot be used in a symbol name. */
4323 if (USE_HIDDEN_LINKONCE)
4324 sprintf (name, "__sparc_get_pc_thunk.%s",
4325 reg_names[REGNO (got_register_rtx)] + 1);
4326 else
4327 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4328 REGNO (got_register_rtx));
4330 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4333 insn
4334 = gen_load_pcrel_sym (got_register_rtx, sparc_got (), got_helper_rtx);
4337 emit_insn (insn);
4340 /* Ensure that we are not using patterns that are not OK with PIC. */
4343 check_pic (int i)
4345 rtx op;
4347 switch (flag_pic)
4349 case 1:
4350 op = recog_data.operand[i];
4351 gcc_assert (GET_CODE (op) != SYMBOL_REF
4352 && (GET_CODE (op) != CONST
4353 || (GET_CODE (XEXP (op, 0)) == MINUS
4354 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4355 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4356 /* fallthrough */
4357 case 2:
4358 default:
4359 return 1;
4363 /* Return true if X is an address which needs a temporary register when
4364 reloaded while generating PIC code. */
4367 pic_address_needs_scratch (rtx x)
4369 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4370 if (GET_CODE (x) == CONST
4371 && GET_CODE (XEXP (x, 0)) == PLUS
4372 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4373 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4374 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4375 return 1;
4377 return 0;
4380 /* Determine if a given RTX is a valid constant. We already know this
4381 satisfies CONSTANT_P. */
4383 static bool
4384 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4386 switch (GET_CODE (x))
4388 case CONST:
4389 case SYMBOL_REF:
4390 if (sparc_tls_referenced_p (x))
4391 return false;
4392 break;
4394 case CONST_DOUBLE:
4395 /* Floating point constants are generally not ok.
4396 The only exception is 0.0 and all-ones in VIS. */
4397 if (TARGET_VIS
4398 && SCALAR_FLOAT_MODE_P (mode)
4399 && (const_zero_operand (x, mode)
4400 || const_all_ones_operand (x, mode)))
4401 return true;
4403 return false;
4405 case CONST_VECTOR:
4406 /* Vector constants are generally not ok.
4407 The only exception is 0 or -1 in VIS. */
4408 if (TARGET_VIS
4409 && (const_zero_operand (x, mode)
4410 || const_all_ones_operand (x, mode)))
4411 return true;
4413 return false;
4415 default:
4416 break;
4419 return true;
4422 /* Determine if a given RTX is a valid constant address. */
4424 bool
4425 constant_address_p (rtx x)
4427 switch (GET_CODE (x))
4429 case LABEL_REF:
4430 case CONST_INT:
4431 case HIGH:
4432 return true;
4434 case CONST:
4435 if (flag_pic && pic_address_needs_scratch (x))
4436 return false;
4437 return sparc_legitimate_constant_p (Pmode, x);
4439 case SYMBOL_REF:
4440 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4442 default:
4443 return false;
4447 /* Nonzero if the constant value X is a legitimate general operand
4448 when generating PIC code. It is given that flag_pic is on and
4449 that X satisfies CONSTANT_P. */
4451 bool
4452 legitimate_pic_operand_p (rtx x)
4454 if (pic_address_needs_scratch (x))
4455 return false;
4456 if (sparc_tls_referenced_p (x))
4457 return false;
4458 return true;
4461 /* Return true if X is a representation of the PIC register. */
4463 static bool
4464 sparc_pic_register_p (rtx x)
4466 if (!REG_P (x) || !pic_offset_table_rtx)
4467 return false;
4469 if (x == pic_offset_table_rtx)
4470 return true;
4472 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4473 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4474 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4475 return true;
4477 return false;
4480 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4481 (CONST_INT_P (X) \
4482 && INTVAL (X) >= -0x1000 \
4483 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4485 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4486 (CONST_INT_P (X) \
4487 && INTVAL (X) >= -0x1000 \
4488 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4490 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4492 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4493 ordinarily. This changes a bit when generating PIC. */
4495 static bool
4496 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4498 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4500 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4501 rs1 = addr;
4502 else if (GET_CODE (addr) == PLUS)
4504 rs1 = XEXP (addr, 0);
4505 rs2 = XEXP (addr, 1);
4507 /* Canonicalize. REG comes first, if there are no regs,
4508 LO_SUM comes first. */
4509 if (!REG_P (rs1)
4510 && GET_CODE (rs1) != SUBREG
4511 && (REG_P (rs2)
4512 || GET_CODE (rs2) == SUBREG
4513 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4515 rs1 = XEXP (addr, 1);
4516 rs2 = XEXP (addr, 0);
4519 if ((flag_pic == 1
4520 && sparc_pic_register_p (rs1)
4521 && !REG_P (rs2)
4522 && GET_CODE (rs2) != SUBREG
4523 && GET_CODE (rs2) != LO_SUM
4524 && GET_CODE (rs2) != MEM
4525 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4526 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4527 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4528 || ((REG_P (rs1)
4529 || GET_CODE (rs1) == SUBREG)
4530 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4532 imm1 = rs2;
4533 rs2 = NULL;
4535 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4536 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4538 /* We prohibit REG + REG for TFmode when there are no quad move insns
4539 and we consequently need to split. We do this because REG+REG
4540 is not an offsettable address. If we get the situation in reload
4541 where source and destination of a movtf pattern are both MEMs with
4542 REG+REG address, then only one of them gets converted to an
4543 offsettable address. */
4544 if (mode == TFmode
4545 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4546 return 0;
4548 /* Likewise for TImode, but in all cases. */
4549 if (mode == TImode)
4550 return 0;
4552 /* We prohibit REG + REG on ARCH32 if not optimizing for
4553 DFmode/DImode because then mem_min_alignment is likely to be zero
4554 after reload and the forced split would lack a matching splitter
4555 pattern. */
4556 if (TARGET_ARCH32 && !optimize
4557 && (mode == DFmode || mode == DImode))
4558 return 0;
4560 else if (USE_AS_OFFSETABLE_LO10
4561 && GET_CODE (rs1) == LO_SUM
4562 && TARGET_ARCH64
4563 && ! TARGET_CM_MEDMID
4564 && RTX_OK_FOR_OLO10_P (rs2, mode))
4566 rs2 = NULL;
4567 imm1 = XEXP (rs1, 1);
4568 rs1 = XEXP (rs1, 0);
4569 if (!CONSTANT_P (imm1)
4570 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4571 return 0;
4574 else if (GET_CODE (addr) == LO_SUM)
4576 rs1 = XEXP (addr, 0);
4577 imm1 = XEXP (addr, 1);
4579 if (!CONSTANT_P (imm1)
4580 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4581 return 0;
4583 /* We can't allow TFmode in 32-bit mode, because an offset greater
4584 than the alignment (8) may cause the LO_SUM to overflow. */
4585 if (mode == TFmode && TARGET_ARCH32)
4586 return 0;
4588 /* During reload, accept the HIGH+LO_SUM construct generated by
4589 sparc_legitimize_reload_address. */
4590 if (reload_in_progress
4591 && GET_CODE (rs1) == HIGH
4592 && XEXP (rs1, 0) == imm1)
4593 return 1;
4595 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4596 return 1;
4597 else
4598 return 0;
4600 if (GET_CODE (rs1) == SUBREG)
4601 rs1 = SUBREG_REG (rs1);
4602 if (!REG_P (rs1))
4603 return 0;
4605 if (rs2)
4607 if (GET_CODE (rs2) == SUBREG)
4608 rs2 = SUBREG_REG (rs2);
4609 if (!REG_P (rs2))
4610 return 0;
4613 if (strict)
4615 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4616 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4617 return 0;
4619 else
4621 if ((! SPARC_INT_REG_P (REGNO (rs1))
4622 && REGNO (rs1) != FRAME_POINTER_REGNUM
4623 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4624 || (rs2
4625 && (! SPARC_INT_REG_P (REGNO (rs2))
4626 && REGNO (rs2) != FRAME_POINTER_REGNUM
4627 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4628 return 0;
4630 return 1;
4633 /* Return the SYMBOL_REF for the tls_get_addr function. */
4635 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4637 static rtx
4638 sparc_tls_get_addr (void)
4640 if (!sparc_tls_symbol)
4641 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4643 return sparc_tls_symbol;
4646 /* Return the Global Offset Table to be used in TLS mode. */
4648 static rtx
4649 sparc_tls_got (void)
4651 /* In PIC mode, this is just the PIC offset table. */
4652 if (flag_pic)
4654 crtl->uses_pic_offset_table = 1;
4655 return pic_offset_table_rtx;
4658 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4659 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4660 if (TARGET_SUN_TLS && TARGET_ARCH32)
4662 load_got_register ();
4663 return got_register_rtx;
4666 /* In all other cases, we load a new pseudo with the GOT symbol. */
4667 return copy_to_reg (sparc_got ());
4670 /* Return true if X contains a thread-local symbol. */
4672 static bool
4673 sparc_tls_referenced_p (rtx x)
4675 if (!TARGET_HAVE_TLS)
4676 return false;
4678 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4679 x = XEXP (XEXP (x, 0), 0);
4681 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4682 return true;
4684 /* That's all we handle in sparc_legitimize_tls_address for now. */
4685 return false;
4688 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4689 this (thread-local) address. */
4691 static rtx
4692 sparc_legitimize_tls_address (rtx addr)
4694 rtx temp1, temp2, temp3, ret, o0, got;
4695 rtx_insn *insn;
4697 gcc_assert (can_create_pseudo_p ());
4699 if (GET_CODE (addr) == SYMBOL_REF)
4700 /* Although the various sethi/or sequences generate SImode values, many of
4701 them can be transformed by the linker when relaxing and, if relaxing to
4702 local-exec, will become a sethi/xor pair, which is signed and therefore
4703 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4704 values be spilled onto the stack in 64-bit mode. */
4705 switch (SYMBOL_REF_TLS_MODEL (addr))
4707 case TLS_MODEL_GLOBAL_DYNAMIC:
4708 start_sequence ();
4709 temp1 = gen_reg_rtx (Pmode);
4710 temp2 = gen_reg_rtx (Pmode);
4711 ret = gen_reg_rtx (Pmode);
4712 o0 = gen_rtx_REG (Pmode, 8);
4713 got = sparc_tls_got ();
4714 if (TARGET_ARCH32)
4716 emit_insn (gen_tgd_hi22si (temp1, addr));
4717 emit_insn (gen_tgd_lo10si (temp2, temp1, addr));
4718 emit_insn (gen_tgd_addsi (o0, got, temp2, addr));
4719 insn = emit_call_insn (gen_tgd_callsi (o0, sparc_tls_get_addr (),
4720 addr, const1_rtx));
4722 else
4724 emit_insn (gen_tgd_hi22di (temp1, addr));
4725 emit_insn (gen_tgd_lo10di (temp2, temp1, addr));
4726 emit_insn (gen_tgd_adddi (o0, got, temp2, addr));
4727 insn = emit_call_insn (gen_tgd_calldi (o0, sparc_tls_get_addr (),
4728 addr, const1_rtx));
4730 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4731 RTL_CONST_CALL_P (insn) = 1;
4732 insn = get_insns ();
4733 end_sequence ();
4734 emit_libcall_block (insn, ret, o0, addr);
4735 break;
4737 case TLS_MODEL_LOCAL_DYNAMIC:
4738 start_sequence ();
4739 temp1 = gen_reg_rtx (Pmode);
4740 temp2 = gen_reg_rtx (Pmode);
4741 temp3 = gen_reg_rtx (Pmode);
4742 ret = gen_reg_rtx (Pmode);
4743 o0 = gen_rtx_REG (Pmode, 8);
4744 got = sparc_tls_got ();
4745 if (TARGET_ARCH32)
4747 emit_insn (gen_tldm_hi22si (temp1));
4748 emit_insn (gen_tldm_lo10si (temp2, temp1));
4749 emit_insn (gen_tldm_addsi (o0, got, temp2));
4750 insn = emit_call_insn (gen_tldm_callsi (o0, sparc_tls_get_addr (),
4751 const1_rtx));
4753 else
4755 emit_insn (gen_tldm_hi22di (temp1));
4756 emit_insn (gen_tldm_lo10di (temp2, temp1));
4757 emit_insn (gen_tldm_adddi (o0, got, temp2));
4758 insn = emit_call_insn (gen_tldm_calldi (o0, sparc_tls_get_addr (),
4759 const1_rtx));
4761 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4762 RTL_CONST_CALL_P (insn) = 1;
4763 insn = get_insns ();
4764 end_sequence ();
4765 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4766 share the LD_BASE result with other LD model accesses. */
4767 emit_libcall_block (insn, temp3, o0,
4768 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4769 UNSPEC_TLSLD_BASE));
4770 temp1 = gen_reg_rtx (Pmode);
4771 temp2 = gen_reg_rtx (Pmode);
4772 if (TARGET_ARCH32)
4774 emit_insn (gen_tldo_hix22si (temp1, addr));
4775 emit_insn (gen_tldo_lox10si (temp2, temp1, addr));
4776 emit_insn (gen_tldo_addsi (ret, temp3, temp2, addr));
4778 else
4780 emit_insn (gen_tldo_hix22di (temp1, addr));
4781 emit_insn (gen_tldo_lox10di (temp2, temp1, addr));
4782 emit_insn (gen_tldo_adddi (ret, temp3, temp2, addr));
4784 break;
4786 case TLS_MODEL_INITIAL_EXEC:
4787 temp1 = gen_reg_rtx (Pmode);
4788 temp2 = gen_reg_rtx (Pmode);
4789 temp3 = gen_reg_rtx (Pmode);
4790 got = sparc_tls_got ();
4791 if (TARGET_ARCH32)
4793 emit_insn (gen_tie_hi22si (temp1, addr));
4794 emit_insn (gen_tie_lo10si (temp2, temp1, addr));
4795 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4797 else
4799 emit_insn (gen_tie_hi22di (temp1, addr));
4800 emit_insn (gen_tie_lo10di (temp2, temp1, addr));
4801 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4803 if (TARGET_SUN_TLS)
4805 ret = gen_reg_rtx (Pmode);
4806 if (TARGET_ARCH32)
4807 emit_insn (gen_tie_addsi (ret, gen_rtx_REG (Pmode, 7),
4808 temp3, addr));
4809 else
4810 emit_insn (gen_tie_adddi (ret, gen_rtx_REG (Pmode, 7),
4811 temp3, addr));
4813 else
4814 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4815 break;
4817 case TLS_MODEL_LOCAL_EXEC:
4818 temp1 = gen_reg_rtx (Pmode);
4819 temp2 = gen_reg_rtx (Pmode);
4820 if (TARGET_ARCH32)
4822 emit_insn (gen_tle_hix22si (temp1, addr));
4823 emit_insn (gen_tle_lox10si (temp2, temp1, addr));
4825 else
4827 emit_insn (gen_tle_hix22di (temp1, addr));
4828 emit_insn (gen_tle_lox10di (temp2, temp1, addr));
4830 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4831 break;
4833 default:
4834 gcc_unreachable ();
4837 else if (GET_CODE (addr) == CONST)
4839 rtx base, offset;
4841 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4843 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4844 offset = XEXP (XEXP (addr, 0), 1);
4846 base = force_operand (base, NULL_RTX);
4847 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4848 offset = force_reg (Pmode, offset);
4849 ret = gen_rtx_PLUS (Pmode, base, offset);
4852 else
4853 gcc_unreachable (); /* for now ... */
4855 return ret;
4858 /* Legitimize PIC addresses. If the address is already position-independent,
4859 we return ORIG. Newly generated position-independent addresses go into a
4860 reg. This is REG if nonzero, otherwise we allocate register(s) as
4861 necessary. */
4863 static rtx
4864 sparc_legitimize_pic_address (rtx orig, rtx reg)
4866 if (GET_CODE (orig) == SYMBOL_REF
4867 /* See the comment in sparc_expand_move. */
4868 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4870 bool gotdata_op = false;
4871 rtx pic_ref, address;
4872 rtx_insn *insn;
4874 if (!reg)
4876 gcc_assert (can_create_pseudo_p ());
4877 reg = gen_reg_rtx (Pmode);
4880 if (flag_pic == 2)
4882 /* If not during reload, allocate another temp reg here for loading
4883 in the address, so that these instructions can be optimized
4884 properly. */
4885 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4887 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4888 won't get confused into thinking that these two instructions
4889 are loading in the true address of the symbol. If in the
4890 future a PIC rtx exists, that should be used instead. */
4891 if (TARGET_ARCH64)
4893 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4894 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4896 else
4898 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4899 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4902 address = temp_reg;
4903 gotdata_op = true;
4905 else
4906 address = orig;
4908 crtl->uses_pic_offset_table = 1;
4909 if (gotdata_op)
4911 if (TARGET_ARCH64)
4912 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4913 pic_offset_table_rtx,
4914 address, orig));
4915 else
4916 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4917 pic_offset_table_rtx,
4918 address, orig));
4920 else
4922 pic_ref
4923 = gen_const_mem (Pmode,
4924 gen_rtx_PLUS (Pmode,
4925 pic_offset_table_rtx, address));
4926 insn = emit_move_insn (reg, pic_ref);
4929 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4930 by loop. */
4931 set_unique_reg_note (insn, REG_EQUAL, orig);
4932 return reg;
4934 else if (GET_CODE (orig) == CONST)
4936 rtx base, offset;
4938 if (GET_CODE (XEXP (orig, 0)) == PLUS
4939 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4940 return orig;
4942 if (!reg)
4944 gcc_assert (can_create_pseudo_p ());
4945 reg = gen_reg_rtx (Pmode);
4948 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4949 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4950 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4951 base == reg ? NULL_RTX : reg);
4953 if (GET_CODE (offset) == CONST_INT)
4955 if (SMALL_INT (offset))
4956 return plus_constant (Pmode, base, INTVAL (offset));
4957 else if (can_create_pseudo_p ())
4958 offset = force_reg (Pmode, offset);
4959 else
4960 /* If we reach here, then something is seriously wrong. */
4961 gcc_unreachable ();
4963 return gen_rtx_PLUS (Pmode, base, offset);
4965 else if (GET_CODE (orig) == LABEL_REF)
4966 /* ??? We ought to be checking that the register is live instead, in case
4967 it is eliminated. */
4968 crtl->uses_pic_offset_table = 1;
4970 return orig;
4973 /* Try machine-dependent ways of modifying an illegitimate address X
4974 to be legitimate. If we find one, return the new, valid address.
4976 OLDX is the address as it was before break_out_memory_refs was called.
4977 In some cases it is useful to look at this to decide what needs to be done.
4979 MODE is the mode of the operand pointed to by X.
4981 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4983 static rtx
4984 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4985 machine_mode mode)
4987 rtx orig_x = x;
4989 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4990 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4991 force_operand (XEXP (x, 0), NULL_RTX));
4992 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4993 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4994 force_operand (XEXP (x, 1), NULL_RTX));
4995 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4996 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4997 XEXP (x, 1));
4998 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4999 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5000 force_operand (XEXP (x, 1), NULL_RTX));
5002 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
5003 return x;
5005 if (sparc_tls_referenced_p (x))
5006 x = sparc_legitimize_tls_address (x);
5007 else if (flag_pic)
5008 x = sparc_legitimize_pic_address (x, NULL_RTX);
5009 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
5010 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
5011 copy_to_mode_reg (Pmode, XEXP (x, 1)));
5012 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
5013 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
5014 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5015 else if (GET_CODE (x) == SYMBOL_REF
5016 || GET_CODE (x) == CONST
5017 || GET_CODE (x) == LABEL_REF)
5018 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5020 return x;
5023 /* Delegitimize an address that was legitimized by the above function. */
5025 static rtx
5026 sparc_delegitimize_address (rtx x)
5028 x = delegitimize_mem_from_attrs (x);
5030 if (GET_CODE (x) == LO_SUM)
5031 x = XEXP (x, 1);
5033 if (GET_CODE (x) == UNSPEC)
5034 switch (XINT (x, 1))
5036 case UNSPEC_MOVE_PIC:
5037 case UNSPEC_TLSLE:
5038 x = XVECEXP (x, 0, 0);
5039 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5040 break;
5041 case UNSPEC_MOVE_GOTDATA:
5042 x = XVECEXP (x, 0, 2);
5043 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5044 break;
5045 default:
5046 break;
5049 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5050 if (GET_CODE (x) == MINUS
5051 && (XEXP (x, 0) == got_register_rtx
5052 || sparc_pic_register_p (XEXP (x, 0))))
5054 rtx y = XEXP (x, 1);
5056 if (GET_CODE (y) == LO_SUM)
5057 y = XEXP (y, 1);
5059 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5061 x = XVECEXP (y, 0, 0);
5062 gcc_assert (GET_CODE (x) == LABEL_REF
5063 || (GET_CODE (x) == CONST
5064 && GET_CODE (XEXP (x, 0)) == PLUS
5065 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5066 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5070 return x;
5073 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5074 replace the input X, or the original X if no replacement is called for.
5075 The output parameter *WIN is 1 if the calling macro should goto WIN,
5076 0 if it should not.
5078 For SPARC, we wish to handle addresses by splitting them into
5079 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5080 This cuts the number of extra insns by one.
5082 Do nothing when generating PIC code and the address is a symbolic
5083 operand or requires a scratch register. */
5086 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5087 int opnum, int type,
5088 int ind_levels ATTRIBUTE_UNUSED, int *win)
5090 /* Decompose SImode constants into HIGH+LO_SUM. */
5091 if (CONSTANT_P (x)
5092 && (mode != TFmode || TARGET_ARCH64)
5093 && GET_MODE (x) == SImode
5094 && GET_CODE (x) != LO_SUM
5095 && GET_CODE (x) != HIGH
5096 && sparc_code_model <= CM_MEDLOW
5097 && !(flag_pic
5098 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5100 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5101 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5102 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5103 opnum, (enum reload_type)type);
5104 *win = 1;
5105 return x;
5108 /* We have to recognize what we have already generated above. */
5109 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5111 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5112 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5113 opnum, (enum reload_type)type);
5114 *win = 1;
5115 return x;
5118 *win = 0;
5119 return x;
5122 /* Return true if ADDR (a legitimate address expression)
5123 has an effect that depends on the machine mode it is used for.
5125 In PIC mode,
5127 (mem:HI [%l7+a])
5129 is not equivalent to
5131 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5133 because [%l7+a+1] is interpreted as the address of (a+1). */
5136 static bool
5137 sparc_mode_dependent_address_p (const_rtx addr,
5138 addr_space_t as ATTRIBUTE_UNUSED)
5140 if (GET_CODE (addr) == PLUS
5141 && sparc_pic_register_p (XEXP (addr, 0))
5142 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5143 return true;
5145 return false;
5148 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5149 address of the call target. */
5151 void
5152 sparc_emit_call_insn (rtx pat, rtx addr)
5154 rtx_insn *insn;
5156 insn = emit_call_insn (pat);
5158 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5159 if (TARGET_VXWORKS_RTP
5160 && flag_pic
5161 && GET_CODE (addr) == SYMBOL_REF
5162 && (SYMBOL_REF_DECL (addr)
5163 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5164 : !SYMBOL_REF_LOCAL_P (addr)))
5166 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5167 crtl->uses_pic_offset_table = 1;
5171 /* Return 1 if RTX is a MEM which is known to be aligned to at
5172 least a DESIRED byte boundary. */
5175 mem_min_alignment (rtx mem, int desired)
5177 rtx addr, base, offset;
5179 /* If it's not a MEM we can't accept it. */
5180 if (GET_CODE (mem) != MEM)
5181 return 0;
5183 /* Obviously... */
5184 if (!TARGET_UNALIGNED_DOUBLES
5185 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5186 return 1;
5188 /* ??? The rest of the function predates MEM_ALIGN so
5189 there is probably a bit of redundancy. */
5190 addr = XEXP (mem, 0);
5191 base = offset = NULL_RTX;
5192 if (GET_CODE (addr) == PLUS)
5194 if (GET_CODE (XEXP (addr, 0)) == REG)
5196 base = XEXP (addr, 0);
5198 /* What we are saying here is that if the base
5199 REG is aligned properly, the compiler will make
5200 sure any REG based index upon it will be so
5201 as well. */
5202 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5203 offset = XEXP (addr, 1);
5204 else
5205 offset = const0_rtx;
5208 else if (GET_CODE (addr) == REG)
5210 base = addr;
5211 offset = const0_rtx;
5214 if (base != NULL_RTX)
5216 int regno = REGNO (base);
5218 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5220 /* Check if the compiler has recorded some information
5221 about the alignment of the base REG. If reload has
5222 completed, we already matched with proper alignments.
5223 If not running global_alloc, reload might give us
5224 unaligned pointer to local stack though. */
5225 if (((cfun != 0
5226 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5227 || (optimize && reload_completed))
5228 && (INTVAL (offset) & (desired - 1)) == 0)
5229 return 1;
5231 else
5233 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5234 return 1;
5237 else if (! TARGET_UNALIGNED_DOUBLES
5238 || CONSTANT_P (addr)
5239 || GET_CODE (addr) == LO_SUM)
5241 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5242 is true, in which case we can only assume that an access is aligned if
5243 it is to a constant address, or the address involves a LO_SUM. */
5244 return 1;
5247 /* An obviously unaligned address. */
5248 return 0;
5252 /* Vectors to keep interesting information about registers where it can easily
5253 be got. We used to use the actual mode value as the bit number, but there
5254 are more than 32 modes now. Instead we use two tables: one indexed by
5255 hard register number, and one indexed by mode. */
5257 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5258 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5259 mapped into one sparc_mode_class mode. */
5261 enum sparc_mode_class {
5262 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5263 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5264 CC_MODE, CCFP_MODE
5267 /* Modes for single-word and smaller quantities. */
5268 #define S_MODES \
5269 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5271 /* Modes for double-word and smaller quantities. */
5272 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5274 /* Modes for quad-word and smaller quantities. */
5275 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5277 /* Modes for 8-word and smaller quantities. */
5278 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5280 /* Modes for single-float quantities. */
5281 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5283 /* Modes for double-float and smaller quantities. */
5284 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5286 /* Modes for quad-float and smaller quantities. */
5287 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5289 /* Modes for quad-float pairs and smaller quantities. */
5290 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5292 /* Modes for double-float only quantities. */
5293 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5295 /* Modes for quad-float and double-float only quantities. */
5296 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5298 /* Modes for quad-float pairs and double-float only quantities. */
5299 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5301 /* Modes for condition codes. */
5302 #define CC_MODES (1 << (int) CC_MODE)
5303 #define CCFP_MODES (1 << (int) CCFP_MODE)
5305 /* Value is 1 if register/mode pair is acceptable on sparc.
5307 The funny mixture of D and T modes is because integer operations
5308 do not specially operate on tetra quantities, so non-quad-aligned
5309 registers can hold quadword quantities (except %o4 and %i4 because
5310 they cross fixed registers).
5312 ??? Note that, despite the settings, non-double-aligned parameter
5313 registers can hold double-word quantities in 32-bit mode. */
5315 /* This points to either the 32-bit or the 64-bit version. */
5316 static const int *hard_regno_mode_classes;
5318 static const int hard_32bit_mode_classes[] = {
5319 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5320 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5321 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5322 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5324 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5325 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5326 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5327 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5329 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5330 and none can hold SFmode/SImode values. */
5331 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5332 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5333 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5334 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5336 /* %fcc[0123] */
5337 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5339 /* %icc, %sfp, %gsr */
5340 CC_MODES, 0, D_MODES
5343 static const int hard_64bit_mode_classes[] = {
5344 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5345 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5346 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5347 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5349 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5350 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5351 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5352 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5354 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5355 and none can hold SFmode/SImode values. */
5356 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5357 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5358 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5359 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5361 /* %fcc[0123] */
5362 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5364 /* %icc, %sfp, %gsr */
5365 CC_MODES, 0, D_MODES
5368 static int sparc_mode_class [NUM_MACHINE_MODES];
5370 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5372 static void
5373 sparc_init_modes (void)
5375 int i;
5377 for (i = 0; i < NUM_MACHINE_MODES; i++)
5379 machine_mode m = (machine_mode) i;
5380 unsigned int size = GET_MODE_SIZE (m);
5382 switch (GET_MODE_CLASS (m))
5384 case MODE_INT:
5385 case MODE_PARTIAL_INT:
5386 case MODE_COMPLEX_INT:
5387 if (size < 4)
5388 sparc_mode_class[i] = 1 << (int) H_MODE;
5389 else if (size == 4)
5390 sparc_mode_class[i] = 1 << (int) S_MODE;
5391 else if (size == 8)
5392 sparc_mode_class[i] = 1 << (int) D_MODE;
5393 else if (size == 16)
5394 sparc_mode_class[i] = 1 << (int) T_MODE;
5395 else if (size == 32)
5396 sparc_mode_class[i] = 1 << (int) O_MODE;
5397 else
5398 sparc_mode_class[i] = 0;
5399 break;
5400 case MODE_VECTOR_INT:
5401 if (size == 4)
5402 sparc_mode_class[i] = 1 << (int) SF_MODE;
5403 else if (size == 8)
5404 sparc_mode_class[i] = 1 << (int) DF_MODE;
5405 else
5406 sparc_mode_class[i] = 0;
5407 break;
5408 case MODE_FLOAT:
5409 case MODE_COMPLEX_FLOAT:
5410 if (size == 4)
5411 sparc_mode_class[i] = 1 << (int) SF_MODE;
5412 else if (size == 8)
5413 sparc_mode_class[i] = 1 << (int) DF_MODE;
5414 else if (size == 16)
5415 sparc_mode_class[i] = 1 << (int) TF_MODE;
5416 else if (size == 32)
5417 sparc_mode_class[i] = 1 << (int) OF_MODE;
5418 else
5419 sparc_mode_class[i] = 0;
5420 break;
5421 case MODE_CC:
5422 if (m == CCFPmode || m == CCFPEmode)
5423 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5424 else
5425 sparc_mode_class[i] = 1 << (int) CC_MODE;
5426 break;
5427 default:
5428 sparc_mode_class[i] = 0;
5429 break;
5433 if (TARGET_ARCH64)
5434 hard_regno_mode_classes = hard_64bit_mode_classes;
5435 else
5436 hard_regno_mode_classes = hard_32bit_mode_classes;
5438 /* Initialize the array used by REGNO_REG_CLASS. */
5439 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5441 if (i < 16 && TARGET_V8PLUS)
5442 sparc_regno_reg_class[i] = I64_REGS;
5443 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5444 sparc_regno_reg_class[i] = GENERAL_REGS;
5445 else if (i < 64)
5446 sparc_regno_reg_class[i] = FP_REGS;
5447 else if (i < 96)
5448 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5449 else if (i < 100)
5450 sparc_regno_reg_class[i] = FPCC_REGS;
5451 else
5452 sparc_regno_reg_class[i] = NO_REGS;
5456 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5458 static inline bool
5459 save_global_or_fp_reg_p (unsigned int regno,
5460 int leaf_function ATTRIBUTE_UNUSED)
5462 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5465 /* Return whether the return address register (%i7) is needed. */
5467 static inline bool
5468 return_addr_reg_needed_p (int leaf_function)
5470 /* If it is live, for example because of __builtin_return_address (0). */
5471 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5472 return true;
5474 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5475 if (!leaf_function
5476 /* Loading the GOT register clobbers %o7. */
5477 || crtl->uses_pic_offset_table
5478 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5479 return true;
5481 return false;
5484 /* Return whether REGNO, a local or in register, must be saved/restored. */
5486 static bool
5487 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5489 /* General case: call-saved registers live at some point. */
5490 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5491 return true;
5493 /* Frame pointer register (%fp) if needed. */
5494 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5495 return true;
5497 /* Return address register (%i7) if needed. */
5498 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5499 return true;
5501 /* GOT register (%l7) if needed. */
5502 if (got_register_rtx && regno == REGNO (got_register_rtx))
5503 return true;
5505 /* If the function accesses prior frames, the frame pointer and the return
5506 address of the previous frame must be saved on the stack. */
5507 if (crtl->accesses_prior_frames
5508 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5509 return true;
5511 return false;
5514 /* Compute the frame size required by the function. This function is called
5515 during the reload pass and also by sparc_expand_prologue. */
5517 static HOST_WIDE_INT
5518 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5520 HOST_WIDE_INT frame_size, apparent_frame_size;
5521 int args_size, n_global_fp_regs = 0;
5522 bool save_local_in_regs_p = false;
5523 unsigned int i;
5525 /* If the function allocates dynamic stack space, the dynamic offset is
5526 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5527 if (leaf_function && !cfun->calls_alloca)
5528 args_size = 0;
5529 else
5530 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5532 /* Calculate space needed for global registers. */
5533 if (TARGET_ARCH64)
5535 for (i = 0; i < 8; i++)
5536 if (save_global_or_fp_reg_p (i, 0))
5537 n_global_fp_regs += 2;
5539 else
5541 for (i = 0; i < 8; i += 2)
5542 if (save_global_or_fp_reg_p (i, 0)
5543 || save_global_or_fp_reg_p (i + 1, 0))
5544 n_global_fp_regs += 2;
5547 /* In the flat window model, find out which local and in registers need to
5548 be saved. We don't reserve space in the current frame for them as they
5549 will be spilled into the register window save area of the caller's frame.
5550 However, as soon as we use this register window save area, we must create
5551 that of the current frame to make it the live one. */
5552 if (TARGET_FLAT)
5553 for (i = 16; i < 32; i++)
5554 if (save_local_or_in_reg_p (i, leaf_function))
5556 save_local_in_regs_p = true;
5557 break;
5560 /* Calculate space needed for FP registers. */
5561 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5562 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5563 n_global_fp_regs += 2;
5565 if (size == 0
5566 && n_global_fp_regs == 0
5567 && args_size == 0
5568 && !save_local_in_regs_p)
5569 frame_size = apparent_frame_size = 0;
5570 else
5572 /* Start from the apparent frame size. */
5573 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5575 /* We need to add the size of the outgoing argument area. */
5576 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5578 /* And that of the register window save area. */
5579 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5581 /* Finally, bump to the appropriate alignment. */
5582 frame_size = SPARC_STACK_ALIGN (frame_size);
5585 /* Set up values for use in prologue and epilogue. */
5586 sparc_frame_size = frame_size;
5587 sparc_apparent_frame_size = apparent_frame_size;
5588 sparc_n_global_fp_regs = n_global_fp_regs;
5589 sparc_save_local_in_regs_p = save_local_in_regs_p;
5591 return frame_size;
5594 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5597 sparc_initial_elimination_offset (int to)
5599 int offset;
5601 if (to == STACK_POINTER_REGNUM)
5602 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5603 else
5604 offset = 0;
5606 offset += SPARC_STACK_BIAS;
5607 return offset;
5610 /* Output any necessary .register pseudo-ops. */
5612 void
5613 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5615 int i;
5617 if (TARGET_ARCH32)
5618 return;
5620 /* Check if %g[2367] were used without
5621 .register being printed for them already. */
5622 for (i = 2; i < 8; i++)
5624 if (df_regs_ever_live_p (i)
5625 && ! sparc_hard_reg_printed [i])
5627 sparc_hard_reg_printed [i] = 1;
5628 /* %g7 is used as TLS base register, use #ignore
5629 for it instead of #scratch. */
5630 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5631 i == 7 ? "ignore" : "scratch");
5633 if (i == 3) i = 5;
5637 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5639 #if PROBE_INTERVAL > 4096
5640 #error Cannot use indexed addressing mode for stack probing
5641 #endif
5643 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5644 inclusive. These are offsets from the current stack pointer.
5646 Note that we don't use the REG+REG addressing mode for the probes because
5647 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5648 so the advantages of having a single code win here. */
5650 static void
5651 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5653 rtx g1 = gen_rtx_REG (Pmode, 1);
5655 /* See if we have a constant small number of probes to generate. If so,
5656 that's the easy case. */
5657 if (size <= PROBE_INTERVAL)
5659 emit_move_insn (g1, GEN_INT (first));
5660 emit_insn (gen_rtx_SET (g1,
5661 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5662 emit_stack_probe (plus_constant (Pmode, g1, -size));
5665 /* The run-time loop is made up of 9 insns in the generic case while the
5666 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5667 else if (size <= 4 * PROBE_INTERVAL)
5669 HOST_WIDE_INT i;
5671 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5672 emit_insn (gen_rtx_SET (g1,
5673 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5674 emit_stack_probe (g1);
5676 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5677 it exceeds SIZE. If only two probes are needed, this will not
5678 generate any code. Then probe at FIRST + SIZE. */
5679 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5681 emit_insn (gen_rtx_SET (g1,
5682 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5683 emit_stack_probe (g1);
5686 emit_stack_probe (plus_constant (Pmode, g1,
5687 (i - PROBE_INTERVAL) - size));
5690 /* Otherwise, do the same as above, but in a loop. Note that we must be
5691 extra careful with variables wrapping around because we might be at
5692 the very top (or the very bottom) of the address space and we have
5693 to be able to handle this case properly; in particular, we use an
5694 equality test for the loop condition. */
5695 else
5697 HOST_WIDE_INT rounded_size;
5698 rtx g4 = gen_rtx_REG (Pmode, 4);
5700 emit_move_insn (g1, GEN_INT (first));
5703 /* Step 1: round SIZE to the previous multiple of the interval. */
5705 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5706 emit_move_insn (g4, GEN_INT (rounded_size));
5709 /* Step 2: compute initial and final value of the loop counter. */
5711 /* TEST_ADDR = SP + FIRST. */
5712 emit_insn (gen_rtx_SET (g1,
5713 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5715 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5716 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5719 /* Step 3: the loop
5721 while (TEST_ADDR != LAST_ADDR)
5723 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5724 probe at TEST_ADDR
5727 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5728 until it is equal to ROUNDED_SIZE. */
5730 if (TARGET_ARCH64)
5731 emit_insn (gen_probe_stack_rangedi (g1, g1, g4));
5732 else
5733 emit_insn (gen_probe_stack_rangesi (g1, g1, g4));
5736 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5737 that SIZE is equal to ROUNDED_SIZE. */
5739 if (size != rounded_size)
5740 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5743 /* Make sure nothing is scheduled before we are done. */
5744 emit_insn (gen_blockage ());
5747 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5748 absolute addresses. */
5750 const char *
5751 output_probe_stack_range (rtx reg1, rtx reg2)
5753 static int labelno = 0;
5754 char loop_lab[32];
5755 rtx xops[2];
5757 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5759 /* Loop. */
5760 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5762 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5763 xops[0] = reg1;
5764 xops[1] = GEN_INT (-PROBE_INTERVAL);
5765 output_asm_insn ("add\t%0, %1, %0", xops);
5767 /* Test if TEST_ADDR == LAST_ADDR. */
5768 xops[1] = reg2;
5769 output_asm_insn ("cmp\t%0, %1", xops);
5771 /* Probe at TEST_ADDR and branch. */
5772 if (TARGET_ARCH64)
5773 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5774 else
5775 fputs ("\tbne\t", asm_out_file);
5776 assemble_name_raw (asm_out_file, loop_lab);
5777 fputc ('\n', asm_out_file);
5778 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5779 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5781 return "";
5784 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5785 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5786 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5787 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5788 the action to be performed if it returns false. Return the new offset. */
5790 typedef bool (*sorr_pred_t) (unsigned int, int);
5791 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5793 static int
5794 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5795 int offset, int leaf_function, sorr_pred_t save_p,
5796 sorr_act_t action_true, sorr_act_t action_false)
5798 unsigned int i;
5799 rtx mem;
5800 rtx_insn *insn;
5802 if (TARGET_ARCH64 && high <= 32)
5804 int fp_offset = -1;
5806 for (i = low; i < high; i++)
5808 if (save_p (i, leaf_function))
5810 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5811 base, offset));
5812 if (action_true == SORR_SAVE)
5814 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5815 RTX_FRAME_RELATED_P (insn) = 1;
5817 else /* action_true == SORR_RESTORE */
5819 /* The frame pointer must be restored last since its old
5820 value may be used as base address for the frame. This
5821 is problematic in 64-bit mode only because of the lack
5822 of double-word load instruction. */
5823 if (i == HARD_FRAME_POINTER_REGNUM)
5824 fp_offset = offset;
5825 else
5826 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5828 offset += 8;
5830 else if (action_false == SORR_ADVANCE)
5831 offset += 8;
5834 if (fp_offset >= 0)
5836 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5837 emit_move_insn (hard_frame_pointer_rtx, mem);
5840 else
5842 for (i = low; i < high; i += 2)
5844 bool reg0 = save_p (i, leaf_function);
5845 bool reg1 = save_p (i + 1, leaf_function);
5846 machine_mode mode;
5847 int regno;
5849 if (reg0 && reg1)
5851 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5852 regno = i;
5854 else if (reg0)
5856 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5857 regno = i;
5859 else if (reg1)
5861 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5862 regno = i + 1;
5863 offset += 4;
5865 else
5867 if (action_false == SORR_ADVANCE)
5868 offset += 8;
5869 continue;
5872 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5873 if (action_true == SORR_SAVE)
5875 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5876 RTX_FRAME_RELATED_P (insn) = 1;
5877 if (mode == DImode)
5879 rtx set1, set2;
5880 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5881 offset));
5882 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5883 RTX_FRAME_RELATED_P (set1) = 1;
5885 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5886 offset + 4));
5887 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5888 RTX_FRAME_RELATED_P (set2) = 1;
5889 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5890 gen_rtx_PARALLEL (VOIDmode,
5891 gen_rtvec (2, set1, set2)));
5894 else /* action_true == SORR_RESTORE */
5895 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5897 /* Bump and round down to double word
5898 in case we already bumped by 4. */
5899 offset = ROUND_DOWN (offset + 8, 8);
5903 return offset;
5906 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5908 static rtx
5909 emit_adjust_base_to_offset (rtx base, int offset)
5911 /* ??? This might be optimized a little as %g1 might already have a
5912 value close enough that a single add insn will do. */
5913 /* ??? Although, all of this is probably only a temporary fix because
5914 if %g1 can hold a function result, then sparc_expand_epilogue will
5915 lose (the result will be clobbered). */
5916 rtx new_base = gen_rtx_REG (Pmode, 1);
5917 emit_move_insn (new_base, GEN_INT (offset));
5918 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5919 return new_base;
5922 /* Emit code to save/restore call-saved global and FP registers. */
5924 static void
5925 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5927 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5929 base = emit_adjust_base_to_offset (base, offset);
5930 offset = 0;
5933 offset
5934 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5935 save_global_or_fp_reg_p, action, SORR_NONE);
5936 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5937 save_global_or_fp_reg_p, action, SORR_NONE);
5940 /* Emit code to save/restore call-saved local and in registers. */
5942 static void
5943 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5945 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5947 base = emit_adjust_base_to_offset (base, offset);
5948 offset = 0;
5951 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5952 save_local_or_in_reg_p, action, SORR_ADVANCE);
5955 /* Emit a window_save insn. */
5957 static rtx_insn *
5958 emit_window_save (rtx increment)
5960 rtx_insn *insn = emit_insn (gen_window_save (increment));
5961 RTX_FRAME_RELATED_P (insn) = 1;
5963 /* The incoming return address (%o7) is saved in %i7. */
5964 add_reg_note (insn, REG_CFA_REGISTER,
5965 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5966 gen_rtx_REG (Pmode,
5967 INCOMING_RETURN_ADDR_REGNUM)));
5969 /* The window save event. */
5970 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5972 /* The CFA is %fp, the hard frame pointer. */
5973 add_reg_note (insn, REG_CFA_DEF_CFA,
5974 plus_constant (Pmode, hard_frame_pointer_rtx,
5975 INCOMING_FRAME_SP_OFFSET));
5977 return insn;
5980 /* Generate an increment for the stack pointer. */
5982 static rtx
5983 gen_stack_pointer_inc (rtx increment)
5985 return gen_rtx_SET (stack_pointer_rtx,
5986 gen_rtx_PLUS (Pmode,
5987 stack_pointer_rtx,
5988 increment));
5991 /* Expand the function prologue. The prologue is responsible for reserving
5992 storage for the frame, saving the call-saved registers and loading the
5993 GOT register if needed. */
5995 void
5996 sparc_expand_prologue (void)
5998 HOST_WIDE_INT size;
5999 rtx_insn *insn;
6001 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
6002 on the final value of the flag means deferring the prologue/epilogue
6003 expansion until just before the second scheduling pass, which is too
6004 late to emit multiple epilogues or return insns.
6006 Of course we are making the assumption that the value of the flag
6007 will not change between now and its final value. Of the three parts
6008 of the formula, only the last one can reasonably vary. Let's take a
6009 closer look, after assuming that the first two ones are set to true
6010 (otherwise the last value is effectively silenced).
6012 If only_leaf_regs_used returns false, the global predicate will also
6013 be false so the actual frame size calculated below will be positive.
6014 As a consequence, the save_register_window insn will be emitted in
6015 the instruction stream; now this insn explicitly references %fp
6016 which is not a leaf register so only_leaf_regs_used will always
6017 return false subsequently.
6019 If only_leaf_regs_used returns true, we hope that the subsequent
6020 optimization passes won't cause non-leaf registers to pop up. For
6021 example, the regrename pass has special provisions to not rename to
6022 non-leaf registers in a leaf function. */
6023 sparc_leaf_function_p
6024 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6026 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6028 if (flag_stack_usage_info)
6029 current_function_static_stack_size = size;
6031 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6032 || flag_stack_clash_protection)
6034 if (crtl->is_leaf && !cfun->calls_alloca)
6036 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6037 sparc_emit_probe_stack_range (get_stack_check_protect (),
6038 size - get_stack_check_protect ());
6040 else if (size > 0)
6041 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6044 if (size == 0)
6045 ; /* do nothing. */
6046 else if (sparc_leaf_function_p)
6048 rtx size_int_rtx = GEN_INT (-size);
6050 if (size <= 4096)
6051 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6052 else if (size <= 8192)
6054 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6055 RTX_FRAME_RELATED_P (insn) = 1;
6057 /* %sp is still the CFA register. */
6058 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6060 else
6062 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6063 emit_move_insn (size_rtx, size_int_rtx);
6064 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6065 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6066 gen_stack_pointer_inc (size_int_rtx));
6069 RTX_FRAME_RELATED_P (insn) = 1;
6071 else
6073 rtx size_int_rtx = GEN_INT (-size);
6075 if (size <= 4096)
6076 emit_window_save (size_int_rtx);
6077 else if (size <= 8192)
6079 emit_window_save (GEN_INT (-4096));
6081 /* %sp is not the CFA register anymore. */
6082 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6084 /* Make sure no %fp-based store is issued until after the frame is
6085 established. The offset between the frame pointer and the stack
6086 pointer is calculated relative to the value of the stack pointer
6087 at the end of the function prologue, and moving instructions that
6088 access the stack via the frame pointer between the instructions
6089 that decrement the stack pointer could result in accessing the
6090 register window save area, which is volatile. */
6091 emit_insn (gen_frame_blockage ());
6093 else
6095 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6096 emit_move_insn (size_rtx, size_int_rtx);
6097 emit_window_save (size_rtx);
6101 if (sparc_leaf_function_p)
6103 sparc_frame_base_reg = stack_pointer_rtx;
6104 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6106 else
6108 sparc_frame_base_reg = hard_frame_pointer_rtx;
6109 sparc_frame_base_offset = SPARC_STACK_BIAS;
6112 if (sparc_n_global_fp_regs > 0)
6113 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6114 sparc_frame_base_offset
6115 - sparc_apparent_frame_size,
6116 SORR_SAVE);
6118 /* Advertise that the data calculated just above are now valid. */
6119 sparc_prologue_data_valid_p = true;
6122 /* Expand the function prologue. The prologue is responsible for reserving
6123 storage for the frame, saving the call-saved registers and loading the
6124 GOT register if needed. */
6126 void
6127 sparc_flat_expand_prologue (void)
6129 HOST_WIDE_INT size;
6130 rtx_insn *insn;
6132 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6134 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6136 if (flag_stack_usage_info)
6137 current_function_static_stack_size = size;
6139 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6140 || flag_stack_clash_protection)
6142 if (crtl->is_leaf && !cfun->calls_alloca)
6144 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6145 sparc_emit_probe_stack_range (get_stack_check_protect (),
6146 size - get_stack_check_protect ());
6148 else if (size > 0)
6149 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6152 if (sparc_save_local_in_regs_p)
6153 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6154 SORR_SAVE);
6156 if (size == 0)
6157 ; /* do nothing. */
6158 else
6160 rtx size_int_rtx, size_rtx;
6162 size_rtx = size_int_rtx = GEN_INT (-size);
6164 /* We establish the frame (i.e. decrement the stack pointer) first, even
6165 if we use a frame pointer, because we cannot clobber any call-saved
6166 registers, including the frame pointer, if we haven't created a new
6167 register save area, for the sake of compatibility with the ABI. */
6168 if (size <= 4096)
6169 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6170 else if (size <= 8192 && !frame_pointer_needed)
6172 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6173 RTX_FRAME_RELATED_P (insn) = 1;
6174 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6176 else
6178 size_rtx = gen_rtx_REG (Pmode, 1);
6179 emit_move_insn (size_rtx, size_int_rtx);
6180 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6181 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6182 gen_stack_pointer_inc (size_int_rtx));
6184 RTX_FRAME_RELATED_P (insn) = 1;
6186 /* Ensure nothing is scheduled until after the frame is established. */
6187 emit_insn (gen_blockage ());
6189 if (frame_pointer_needed)
6191 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6192 gen_rtx_MINUS (Pmode,
6193 stack_pointer_rtx,
6194 size_rtx)));
6195 RTX_FRAME_RELATED_P (insn) = 1;
6197 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6198 gen_rtx_SET (hard_frame_pointer_rtx,
6199 plus_constant (Pmode, stack_pointer_rtx,
6200 size)));
6203 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6205 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6206 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6208 insn = emit_move_insn (i7, o7);
6209 RTX_FRAME_RELATED_P (insn) = 1;
6211 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6213 /* Prevent this instruction from ever being considered dead,
6214 even if this function has no epilogue. */
6215 emit_use (i7);
6219 if (frame_pointer_needed)
6221 sparc_frame_base_reg = hard_frame_pointer_rtx;
6222 sparc_frame_base_offset = SPARC_STACK_BIAS;
6224 else
6226 sparc_frame_base_reg = stack_pointer_rtx;
6227 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6230 if (sparc_n_global_fp_regs > 0)
6231 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6232 sparc_frame_base_offset
6233 - sparc_apparent_frame_size,
6234 SORR_SAVE);
6236 /* Advertise that the data calculated just above are now valid. */
6237 sparc_prologue_data_valid_p = true;
6240 /* This function generates the assembly code for function entry, which boils
6241 down to emitting the necessary .register directives. */
6243 static void
6244 sparc_asm_function_prologue (FILE *file)
6246 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6247 if (!TARGET_FLAT)
6248 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6250 sparc_output_scratch_registers (file);
6253 /* Expand the function epilogue, either normal or part of a sibcall.
6254 We emit all the instructions except the return or the call. */
6256 void
6257 sparc_expand_epilogue (bool for_eh)
6259 HOST_WIDE_INT size = sparc_frame_size;
6261 if (cfun->calls_alloca)
6262 emit_insn (gen_frame_blockage ());
6264 if (sparc_n_global_fp_regs > 0)
6265 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6266 sparc_frame_base_offset
6267 - sparc_apparent_frame_size,
6268 SORR_RESTORE);
6270 if (size == 0 || for_eh)
6271 ; /* do nothing. */
6272 else if (sparc_leaf_function_p)
6274 if (size <= 4096)
6275 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6276 else if (size <= 8192)
6278 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6279 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6281 else
6283 rtx reg = gen_rtx_REG (Pmode, 1);
6284 emit_move_insn (reg, GEN_INT (size));
6285 emit_insn (gen_stack_pointer_inc (reg));
6290 /* Expand the function epilogue, either normal or part of a sibcall.
6291 We emit all the instructions except the return or the call. */
6293 void
6294 sparc_flat_expand_epilogue (bool for_eh)
6296 HOST_WIDE_INT size = sparc_frame_size;
6298 if (sparc_n_global_fp_regs > 0)
6299 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6300 sparc_frame_base_offset
6301 - sparc_apparent_frame_size,
6302 SORR_RESTORE);
6304 /* If we have a frame pointer, we'll need both to restore it before the
6305 frame is destroyed and use its current value in destroying the frame.
6306 Since we don't have an atomic way to do that in the flat window model,
6307 we save the current value into a temporary register (%g1). */
6308 if (frame_pointer_needed && !for_eh)
6309 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6311 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6312 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6313 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6315 if (sparc_save_local_in_regs_p)
6316 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6317 sparc_frame_base_offset,
6318 SORR_RESTORE);
6320 if (size == 0 || for_eh)
6321 ; /* do nothing. */
6322 else if (frame_pointer_needed)
6324 /* Make sure the frame is destroyed after everything else is done. */
6325 emit_insn (gen_blockage ());
6327 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6329 else
6331 /* Likewise. */
6332 emit_insn (gen_blockage ());
6334 if (size <= 4096)
6335 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6336 else if (size <= 8192)
6338 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6339 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6341 else
6343 rtx reg = gen_rtx_REG (Pmode, 1);
6344 emit_move_insn (reg, GEN_INT (size));
6345 emit_insn (gen_stack_pointer_inc (reg));
6350 /* Return true if it is appropriate to emit `return' instructions in the
6351 body of a function. */
6353 bool
6354 sparc_can_use_return_insn_p (void)
6356 return sparc_prologue_data_valid_p
6357 && sparc_n_global_fp_regs == 0
6358 && TARGET_FLAT
6359 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6360 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6363 /* This function generates the assembly code for function exit. */
6365 static void
6366 sparc_asm_function_epilogue (FILE *file)
6368 /* If the last two instructions of a function are "call foo; dslot;"
6369 the return address might point to the first instruction in the next
6370 function and we have to output a dummy nop for the sake of sane
6371 backtraces in such cases. This is pointless for sibling calls since
6372 the return address is explicitly adjusted. */
6374 rtx_insn *insn = get_last_insn ();
6376 rtx last_real_insn = prev_real_insn (insn);
6377 if (last_real_insn
6378 && NONJUMP_INSN_P (last_real_insn)
6379 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6380 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6382 if (last_real_insn
6383 && CALL_P (last_real_insn)
6384 && !SIBLING_CALL_P (last_real_insn))
6385 fputs("\tnop\n", file);
6387 sparc_output_deferred_case_vectors ();
6390 /* Output a 'restore' instruction. */
6392 static void
6393 output_restore (rtx pat)
6395 rtx operands[3];
6397 if (! pat)
6399 fputs ("\t restore\n", asm_out_file);
6400 return;
6403 gcc_assert (GET_CODE (pat) == SET);
6405 operands[0] = SET_DEST (pat);
6406 pat = SET_SRC (pat);
6408 switch (GET_CODE (pat))
6410 case PLUS:
6411 operands[1] = XEXP (pat, 0);
6412 operands[2] = XEXP (pat, 1);
6413 output_asm_insn (" restore %r1, %2, %Y0", operands);
6414 break;
6415 case LO_SUM:
6416 operands[1] = XEXP (pat, 0);
6417 operands[2] = XEXP (pat, 1);
6418 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6419 break;
6420 case ASHIFT:
6421 operands[1] = XEXP (pat, 0);
6422 gcc_assert (XEXP (pat, 1) == const1_rtx);
6423 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6424 break;
6425 default:
6426 operands[1] = pat;
6427 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6428 break;
6432 /* Output a return. */
6434 const char *
6435 output_return (rtx_insn *insn)
6437 if (crtl->calls_eh_return)
6439 /* If the function uses __builtin_eh_return, the eh_return
6440 machinery occupies the delay slot. */
6441 gcc_assert (!final_sequence);
6443 if (flag_delayed_branch)
6445 if (!TARGET_FLAT && TARGET_V9)
6446 fputs ("\treturn\t%i7+8\n", asm_out_file);
6447 else
6449 if (!TARGET_FLAT)
6450 fputs ("\trestore\n", asm_out_file);
6452 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6455 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6457 else
6459 if (!TARGET_FLAT)
6460 fputs ("\trestore\n", asm_out_file);
6462 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6463 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6466 else if (sparc_leaf_function_p || TARGET_FLAT)
6468 /* This is a leaf or flat function so we don't have to bother restoring
6469 the register window, which frees us from dealing with the convoluted
6470 semantics of restore/return. We simply output the jump to the
6471 return address and the insn in the delay slot (if any). */
6473 return "jmp\t%%o7+%)%#";
6475 else
6477 /* This is a regular function so we have to restore the register window.
6478 We may have a pending insn for the delay slot, which will be either
6479 combined with the 'restore' instruction or put in the delay slot of
6480 the 'return' instruction. */
6482 if (final_sequence)
6484 rtx_insn *delay;
6485 rtx pat;
6487 delay = NEXT_INSN (insn);
6488 gcc_assert (delay);
6490 pat = PATTERN (delay);
6492 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6494 epilogue_renumber (&pat, 0);
6495 return "return\t%%i7+%)%#";
6497 else
6499 output_asm_insn ("jmp\t%%i7+%)", NULL);
6501 /* We're going to output the insn in the delay slot manually.
6502 Make sure to output its source location first. */
6503 PATTERN (delay) = gen_blockage ();
6504 INSN_CODE (delay) = -1;
6505 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6506 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6508 output_restore (pat);
6511 else
6513 /* The delay slot is empty. */
6514 if (TARGET_V9)
6515 return "return\t%%i7+%)\n\t nop";
6516 else if (flag_delayed_branch)
6517 return "jmp\t%%i7+%)\n\t restore";
6518 else
6519 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6523 return "";
6526 /* Output a sibling call. */
6528 const char *
6529 output_sibcall (rtx_insn *insn, rtx call_operand)
6531 rtx operands[1];
6533 gcc_assert (flag_delayed_branch);
6535 operands[0] = call_operand;
6537 if (sparc_leaf_function_p || TARGET_FLAT)
6539 /* This is a leaf or flat function so we don't have to bother restoring
6540 the register window. We simply output the jump to the function and
6541 the insn in the delay slot (if any). */
6543 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6545 if (final_sequence)
6546 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6547 operands);
6548 else
6549 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6550 it into branch if possible. */
6551 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6552 operands);
6554 else
6556 /* This is a regular function so we have to restore the register window.
6557 We may have a pending insn for the delay slot, which will be combined
6558 with the 'restore' instruction. */
6560 output_asm_insn ("call\t%a0, 0", operands);
6562 if (final_sequence)
6564 rtx_insn *delay;
6565 rtx pat;
6567 delay = NEXT_INSN (insn);
6568 gcc_assert (delay);
6570 pat = PATTERN (delay);
6572 /* We're going to output the insn in the delay slot manually.
6573 Make sure to output its source location first. */
6574 PATTERN (delay) = gen_blockage ();
6575 INSN_CODE (delay) = -1;
6576 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6577 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6579 output_restore (pat);
6581 else
6582 output_restore (NULL_RTX);
6585 return "";
6588 /* Functions for handling argument passing.
6590 For 32-bit, the first 6 args are normally in registers and the rest are
6591 pushed. Any arg that starts within the first 6 words is at least
6592 partially passed in a register unless its data type forbids.
6594 For 64-bit, the argument registers are laid out as an array of 16 elements
6595 and arguments are added sequentially. The first 6 int args and up to the
6596 first 16 fp args (depending on size) are passed in regs.
6598 Slot Stack Integral Float Float in structure Double Long Double
6599 ---- ----- -------- ----- ------------------ ------ -----------
6600 15 [SP+248] %f31 %f30,%f31 %d30
6601 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6602 13 [SP+232] %f27 %f26,%f27 %d26
6603 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6604 11 [SP+216] %f23 %f22,%f23 %d22
6605 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6606 9 [SP+200] %f19 %f18,%f19 %d18
6607 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6608 7 [SP+184] %f15 %f14,%f15 %d14
6609 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6610 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6611 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6612 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6613 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6614 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6615 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6617 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6619 Integral arguments are always passed as 64-bit quantities appropriately
6620 extended.
6622 Passing of floating point values is handled as follows.
6623 If a prototype is in scope:
6624 If the value is in a named argument (i.e. not a stdarg function or a
6625 value not part of the `...') then the value is passed in the appropriate
6626 fp reg.
6627 If the value is part of the `...' and is passed in one of the first 6
6628 slots then the value is passed in the appropriate int reg.
6629 If the value is part of the `...' and is not passed in one of the first 6
6630 slots then the value is passed in memory.
6631 If a prototype is not in scope:
6632 If the value is one of the first 6 arguments the value is passed in the
6633 appropriate integer reg and the appropriate fp reg.
6634 If the value is not one of the first 6 arguments the value is passed in
6635 the appropriate fp reg and in memory.
6638 Summary of the calling conventions implemented by GCC on the SPARC:
6640 32-bit ABI:
6641 size argument return value
6643 small integer <4 int. reg. int. reg.
6644 word 4 int. reg. int. reg.
6645 double word 8 int. reg. int. reg.
6647 _Complex small integer <8 int. reg. int. reg.
6648 _Complex word 8 int. reg. int. reg.
6649 _Complex double word 16 memory int. reg.
6651 vector integer <=8 int. reg. FP reg.
6652 vector integer >8 memory memory
6654 float 4 int. reg. FP reg.
6655 double 8 int. reg. FP reg.
6656 long double 16 memory memory
6658 _Complex float 8 memory FP reg.
6659 _Complex double 16 memory FP reg.
6660 _Complex long double 32 memory FP reg.
6662 vector float any memory memory
6664 aggregate any memory memory
6668 64-bit ABI:
6669 size argument return value
6671 small integer <8 int. reg. int. reg.
6672 word 8 int. reg. int. reg.
6673 double word 16 int. reg. int. reg.
6675 _Complex small integer <16 int. reg. int. reg.
6676 _Complex word 16 int. reg. int. reg.
6677 _Complex double word 32 memory int. reg.
6679 vector integer <=16 FP reg. FP reg.
6680 vector integer 16<s<=32 memory FP reg.
6681 vector integer >32 memory memory
6683 float 4 FP reg. FP reg.
6684 double 8 FP reg. FP reg.
6685 long double 16 FP reg. FP reg.
6687 _Complex float 8 FP reg. FP reg.
6688 _Complex double 16 FP reg. FP reg.
6689 _Complex long double 32 memory FP reg.
6691 vector float <=16 FP reg. FP reg.
6692 vector float 16<s<=32 memory FP reg.
6693 vector float >32 memory memory
6695 aggregate <=16 reg. reg.
6696 aggregate 16<s<=32 memory reg.
6697 aggregate >32 memory memory
6701 Note #1: complex floating-point types follow the extended SPARC ABIs as
6702 implemented by the Sun compiler.
6704 Note #2: integer vector types follow the scalar floating-point types
6705 conventions to match what is implemented by the Sun VIS SDK.
6707 Note #3: floating-point vector types follow the aggregate types
6708 conventions. */
6711 /* Maximum number of int regs for args. */
6712 #define SPARC_INT_ARG_MAX 6
6713 /* Maximum number of fp regs for args. */
6714 #define SPARC_FP_ARG_MAX 16
6715 /* Number of words (partially) occupied for a given size in units. */
6716 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6718 /* Handle the INIT_CUMULATIVE_ARGS macro.
6719 Initialize a variable CUM of type CUMULATIVE_ARGS
6720 for a call to a function whose data type is FNTYPE.
6721 For a library call, FNTYPE is 0. */
6723 void
6724 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6726 cum->words = 0;
6727 cum->prototype_p = fntype && prototype_p (fntype);
6728 cum->libcall_p = !fntype;
6731 /* Handle promotion of pointer and integer arguments. */
6733 static machine_mode
6734 sparc_promote_function_mode (const_tree type, machine_mode mode,
6735 int *punsignedp, const_tree, int)
6737 if (type && POINTER_TYPE_P (type))
6739 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6740 return Pmode;
6743 /* Integral arguments are passed as full words, as per the ABI. */
6744 if (GET_MODE_CLASS (mode) == MODE_INT
6745 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6746 return word_mode;
6748 return mode;
6751 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6753 static bool
6754 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6756 return TARGET_ARCH64 ? true : false;
6759 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6760 Specify whether to pass the argument by reference. */
6762 static bool
6763 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6765 tree type = arg.type;
6766 machine_mode mode = arg.mode;
6767 if (TARGET_ARCH32)
6768 /* Original SPARC 32-bit ABI says that structures and unions,
6769 and quad-precision floats are passed by reference.
6770 All other base types are passed in registers.
6772 Extended ABI (as implemented by the Sun compiler) says that all
6773 complex floats are passed by reference. Pass complex integers
6774 in registers up to 8 bytes. More generally, enforce the 2-word
6775 cap for passing arguments in registers.
6777 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6778 vectors are passed like floats of the same size, that is in
6779 registers up to 8 bytes. Pass all vector floats by reference
6780 like structure and unions. */
6781 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6782 || mode == SCmode
6783 /* Catch CDImode, TFmode, DCmode and TCmode. */
6784 || GET_MODE_SIZE (mode) > 8
6785 || (type
6786 && VECTOR_TYPE_P (type)
6787 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6788 else
6789 /* Original SPARC 64-bit ABI says that structures and unions
6790 smaller than 16 bytes are passed in registers, as well as
6791 all other base types.
6793 Extended ABI (as implemented by the Sun compiler) says that
6794 complex floats are passed in registers up to 16 bytes. Pass
6795 all complex integers in registers up to 16 bytes. More generally,
6796 enforce the 2-word cap for passing arguments in registers.
6798 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6799 vectors are passed like floats of the same size, that is in
6800 registers (up to 16 bytes). Pass all vector floats like structure
6801 and unions. */
6802 return ((type
6803 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6804 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6805 /* Catch CTImode and TCmode. */
6806 || GET_MODE_SIZE (mode) > 16);
6809 /* Traverse the record TYPE recursively and call FUNC on its fields.
6810 NAMED is true if this is for a named parameter. DATA is passed
6811 to FUNC for each field. OFFSET is the starting position and
6812 PACKED is true if we are inside a packed record. */
6814 template <typename T, void Func (const_tree, int, bool, T*)>
6815 static void
6816 traverse_record_type (const_tree type, bool named, T *data,
6817 int offset = 0, bool packed = false)
6819 /* The ABI obviously doesn't specify how packed structures are passed.
6820 These are passed in integer regs if possible, otherwise memory. */
6821 if (!packed)
6822 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6823 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6825 packed = true;
6826 break;
6829 /* Walk the real fields, but skip those with no size or a zero size.
6830 ??? Fields with variable offset are handled as having zero offset. */
6831 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6832 if (TREE_CODE (field) == FIELD_DECL)
6834 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6835 continue;
6837 int bitpos = offset;
6838 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6839 bitpos += int_bit_position (field);
6841 tree field_type = TREE_TYPE (field);
6842 if (TREE_CODE (field_type) == RECORD_TYPE)
6843 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6844 packed);
6845 else
6847 const bool fp_type
6848 = FLOAT_TYPE_P (field_type) || VECTOR_TYPE_P (field_type);
6849 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6850 data);
6855 /* Handle recursive register classifying for structure layout. */
6857 typedef struct
6859 bool fp_regs; /* true if field eligible to FP registers. */
6860 bool fp_regs_in_first_word; /* true if such field in first word. */
6861 } classify_data_t;
6863 /* A subroutine of function_arg_slotno. Classify the field. */
6865 inline void
6866 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6868 if (fp)
6870 data->fp_regs = true;
6871 if (bitpos < BITS_PER_WORD)
6872 data->fp_regs_in_first_word = true;
6876 /* Compute the slot number to pass an argument in.
6877 Return the slot number or -1 if passing on the stack.
6879 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6880 the preceding args and about the function being called.
6881 MODE is the argument's machine mode.
6882 TYPE is the data type of the argument (as a tree).
6883 This is null for libcalls where that information may
6884 not be available.
6885 NAMED is nonzero if this argument is a named parameter
6886 (otherwise it is an extra parameter matching an ellipsis).
6887 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6888 *PREGNO records the register number to use if scalar type.
6889 *PPADDING records the amount of padding needed in words. */
6891 static int
6892 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6893 const_tree type, bool named, bool incoming,
6894 int *pregno, int *ppadding)
6896 const int regbase
6897 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6898 int slotno = cum->words, regno;
6899 enum mode_class mclass = GET_MODE_CLASS (mode);
6901 /* Silence warnings in the callers. */
6902 *pregno = -1;
6903 *ppadding = -1;
6905 if (type && TREE_ADDRESSABLE (type))
6906 return -1;
6908 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6909 if (TARGET_ARCH64
6910 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6911 && (slotno & 1) != 0)
6913 slotno++;
6914 *ppadding = 1;
6916 else
6917 *ppadding = 0;
6919 /* Vector types deserve special treatment because they are polymorphic wrt
6920 their mode, depending upon whether VIS instructions are enabled. */
6921 if (type && VECTOR_TYPE_P (type))
6923 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
6925 /* The SPARC port defines no floating-point vector modes. */
6926 gcc_assert (mode == BLKmode);
6928 else
6930 /* Integer vector types should either have a vector
6931 mode or an integral mode, because we are guaranteed
6932 by pass_by_reference that their size is not greater
6933 than 16 bytes and TImode is 16-byte wide. */
6934 gcc_assert (mode != BLKmode);
6936 /* Integer vectors are handled like floats as per
6937 the Sun VIS SDK. */
6938 mclass = MODE_FLOAT;
6942 switch (mclass)
6944 case MODE_FLOAT:
6945 case MODE_COMPLEX_FLOAT:
6946 case MODE_VECTOR_INT:
6947 if (TARGET_ARCH64 && TARGET_FPU && named)
6949 /* If all arg slots are filled, then must pass on stack. */
6950 if (slotno >= SPARC_FP_ARG_MAX)
6951 return -1;
6953 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6954 /* Arguments filling only one single FP register are
6955 right-justified in the outer double FP register. */
6956 if (GET_MODE_SIZE (mode) <= 4)
6957 regno++;
6958 break;
6960 /* fallthrough */
6962 case MODE_INT:
6963 case MODE_COMPLEX_INT:
6964 /* If all arg slots are filled, then must pass on stack. */
6965 if (slotno >= SPARC_INT_ARG_MAX)
6966 return -1;
6968 regno = regbase + slotno;
6969 break;
6971 case MODE_RANDOM:
6972 /* MODE is VOIDmode when generating the actual call. */
6973 if (mode == VOIDmode)
6974 return -1;
6976 if (TARGET_64BIT && TARGET_FPU && named
6977 && type
6978 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6980 /* If all arg slots are filled, then must pass on stack. */
6981 if (slotno >= SPARC_FP_ARG_MAX)
6982 return -1;
6984 if (TREE_CODE (type) == RECORD_TYPE)
6986 classify_data_t data = { false, false };
6987 traverse_record_type<classify_data_t, classify_registers>
6988 (type, named, &data);
6990 if (data.fp_regs)
6992 /* If all FP slots are filled except for the last one and
6993 there is no FP field in the first word, then must pass
6994 on stack. */
6995 if (slotno >= SPARC_FP_ARG_MAX - 1
6996 && !data.fp_regs_in_first_word)
6997 return -1;
6999 else
7001 /* If all int slots are filled, then must pass on stack. */
7002 if (slotno >= SPARC_INT_ARG_MAX)
7003 return -1;
7006 /* PREGNO isn't set since both int and FP regs can be used. */
7007 return slotno;
7010 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7012 else
7014 /* If all arg slots are filled, then must pass on stack. */
7015 if (slotno >= SPARC_INT_ARG_MAX)
7016 return -1;
7018 regno = regbase + slotno;
7020 break;
7022 default :
7023 gcc_unreachable ();
7026 *pregno = regno;
7027 return slotno;
7030 /* Handle recursive register counting/assigning for structure layout. */
7032 typedef struct
7034 int slotno; /* slot number of the argument. */
7035 int regbase; /* regno of the base register. */
7036 int intoffset; /* offset of the first pending integer field. */
7037 int nregs; /* number of words passed in registers. */
7038 bool stack; /* true if part of the argument is on the stack. */
7039 rtx ret; /* return expression being built. */
7040 } assign_data_t;
7042 /* A subroutine of function_arg_record_value. Compute the number of integer
7043 registers to be assigned between PARMS->intoffset and BITPOS. Return
7044 true if at least one integer register is assigned or false otherwise. */
7046 static bool
7047 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7049 if (data->intoffset < 0)
7050 return false;
7052 const int intoffset = data->intoffset;
7053 data->intoffset = -1;
7055 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7056 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7057 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7058 int nregs = (endbit - startbit) / BITS_PER_WORD;
7060 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7062 nregs = SPARC_INT_ARG_MAX - this_slotno;
7064 /* We need to pass this field (partly) on the stack. */
7065 data->stack = 1;
7068 if (nregs <= 0)
7069 return false;
7071 *pnregs = nregs;
7072 return true;
7075 /* A subroutine of function_arg_record_value. Compute the number and the mode
7076 of the FP registers to be assigned for FIELD. Return true if at least one
7077 FP register is assigned or false otherwise. */
7079 static bool
7080 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7081 int *pnregs, machine_mode *pmode)
7083 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7084 machine_mode mode = DECL_MODE (field);
7085 int nregs, nslots;
7087 /* Slots are counted as words while regs are counted as having the size of
7088 the (inner) mode. */
7089 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7091 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7092 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7094 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7096 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7097 nregs = 2;
7099 else
7100 nregs = 1;
7102 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7104 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7106 nslots = SPARC_FP_ARG_MAX - this_slotno;
7107 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7109 /* We need to pass this field (partly) on the stack. */
7110 data->stack = 1;
7112 if (nregs <= 0)
7113 return false;
7116 *pnregs = nregs;
7117 *pmode = mode;
7118 return true;
7121 /* A subroutine of function_arg_record_value. Count the number of registers
7122 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7124 inline void
7125 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7127 if (fp)
7129 int nregs;
7130 machine_mode mode;
7132 if (compute_int_layout (bitpos, data, &nregs))
7133 data->nregs += nregs;
7135 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7136 data->nregs += nregs;
7138 else
7140 if (data->intoffset < 0)
7141 data->intoffset = bitpos;
7145 /* A subroutine of function_arg_record_value. Assign the bits of the
7146 structure between PARMS->intoffset and BITPOS to integer registers. */
7148 static void
7149 assign_int_registers (int bitpos, assign_data_t *data)
7151 int intoffset = data->intoffset;
7152 machine_mode mode;
7153 int nregs;
7155 if (!compute_int_layout (bitpos, data, &nregs))
7156 return;
7158 /* If this is the trailing part of a word, only load that much into
7159 the register. Otherwise load the whole register. Note that in
7160 the latter case we may pick up unwanted bits. It's not a problem
7161 at the moment but may wish to revisit. */
7162 if (intoffset % BITS_PER_WORD != 0)
7163 mode = smallest_int_mode_for_size (BITS_PER_WORD
7164 - intoffset % BITS_PER_WORD);
7165 else
7166 mode = word_mode;
7168 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7169 unsigned int regno = data->regbase + this_slotno;
7170 intoffset /= BITS_PER_UNIT;
7174 rtx reg = gen_rtx_REG (mode, regno);
7175 XVECEXP (data->ret, 0, data->stack + data->nregs)
7176 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7177 data->nregs += 1;
7178 mode = word_mode;
7179 regno += 1;
7180 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7182 while (--nregs > 0);
7185 /* A subroutine of function_arg_record_value. Assign FIELD at position
7186 BITPOS to FP registers. */
7188 static void
7189 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7191 int nregs;
7192 machine_mode mode;
7194 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7195 return;
7197 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7198 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7199 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7200 regno++;
7201 int pos = bitpos / BITS_PER_UNIT;
7205 rtx reg = gen_rtx_REG (mode, regno);
7206 XVECEXP (data->ret, 0, data->stack + data->nregs)
7207 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7208 data->nregs += 1;
7209 regno += GET_MODE_SIZE (mode) / 4;
7210 pos += GET_MODE_SIZE (mode);
7212 while (--nregs > 0);
7215 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7216 the structure between PARMS->intoffset and BITPOS to registers. */
7218 inline void
7219 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7221 if (fp)
7223 assign_int_registers (bitpos, data);
7225 assign_fp_registers (field, bitpos, data);
7227 else
7229 if (data->intoffset < 0)
7230 data->intoffset = bitpos;
7234 /* Used by function_arg and function_value to implement the complex
7235 conventions of the 64-bit ABI for passing and returning structures.
7236 Return an expression valid as a return value for the FUNCTION_ARG
7237 and TARGET_FUNCTION_VALUE.
7239 TYPE is the data type of the argument (as a tree).
7240 This is null for libcalls where that information may
7241 not be available.
7242 MODE is the argument's machine mode.
7243 SLOTNO is the index number of the argument's slot in the parameter array.
7244 NAMED is true if this argument is a named parameter
7245 (otherwise it is an extra parameter matching an ellipsis).
7246 REGBASE is the regno of the base register for the parameter array. */
7248 static rtx
7249 function_arg_record_value (const_tree type, machine_mode mode,
7250 int slotno, bool named, int regbase)
7252 const int size = int_size_in_bytes (type);
7253 assign_data_t data;
7254 int nregs;
7256 data.slotno = slotno;
7257 data.regbase = regbase;
7259 /* Count how many registers we need. */
7260 data.nregs = 0;
7261 data.intoffset = 0;
7262 data.stack = false;
7263 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7265 /* Take into account pending integer fields. */
7266 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7267 data.nregs += nregs;
7269 /* Allocate the vector and handle some annoying special cases. */
7270 nregs = data.nregs;
7272 if (nregs == 0)
7274 /* ??? Empty structure has no value? Duh? */
7275 if (size <= 0)
7277 /* Though there's nothing really to store, return a word register
7278 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7279 leads to breakage due to the fact that there are zero bytes to
7280 load. */
7281 return gen_rtx_REG (mode, regbase);
7284 /* ??? C++ has structures with no fields, and yet a size. Give up
7285 for now and pass everything back in integer registers. */
7286 nregs = CEIL_NWORDS (size);
7287 if (nregs + slotno > SPARC_INT_ARG_MAX)
7288 nregs = SPARC_INT_ARG_MAX - slotno;
7291 gcc_assert (nregs > 0);
7293 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7295 /* If at least one field must be passed on the stack, generate
7296 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7297 also be passed on the stack. We can't do much better because the
7298 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7299 of structures for which the fields passed exclusively in registers
7300 are not at the beginning of the structure. */
7301 if (data.stack)
7302 XVECEXP (data.ret, 0, 0)
7303 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7305 /* Assign the registers. */
7306 data.nregs = 0;
7307 data.intoffset = 0;
7308 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7310 /* Assign pending integer fields. */
7311 assign_int_registers (size * BITS_PER_UNIT, &data);
7313 gcc_assert (data.nregs == nregs);
7315 return data.ret;
7318 /* Used by function_arg and function_value to implement the conventions
7319 of the 64-bit ABI for passing and returning unions.
7320 Return an expression valid as a return value for the FUNCTION_ARG
7321 and TARGET_FUNCTION_VALUE.
7323 SIZE is the size in bytes of the union.
7324 MODE is the argument's machine mode.
7325 SLOTNO is the index number of the argument's slot in the parameter array.
7326 REGNO is the hard register the union will be passed in. */
7328 static rtx
7329 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7331 unsigned int nwords;
7333 /* See comment in function_arg_record_value for empty structures. */
7334 if (size <= 0)
7335 return gen_rtx_REG (mode, regno);
7337 if (slotno == SPARC_INT_ARG_MAX - 1)
7338 nwords = 1;
7339 else
7340 nwords = CEIL_NWORDS (size);
7342 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7344 /* Unions are passed left-justified. */
7345 for (unsigned int i = 0; i < nwords; i++)
7346 XVECEXP (regs, 0, i)
7347 = gen_rtx_EXPR_LIST (VOIDmode,
7348 gen_rtx_REG (word_mode, regno + i),
7349 GEN_INT (UNITS_PER_WORD * i));
7351 return regs;
7354 /* Used by function_arg and function_value to implement the conventions
7355 of the 64-bit ABI for passing and returning BLKmode vectors.
7356 Return an expression valid as a return value for the FUNCTION_ARG
7357 and TARGET_FUNCTION_VALUE.
7359 SIZE is the size in bytes of the vector.
7360 SLOTNO is the index number of the argument's slot in the parameter array.
7361 NAMED is true if this argument is a named parameter
7362 (otherwise it is an extra parameter matching an ellipsis).
7363 REGNO is the hard register the vector will be passed in. */
7365 static rtx
7366 function_arg_vector_value (int size, int slotno, bool named, int regno)
7368 const int mult = (named ? 2 : 1);
7369 unsigned int nwords;
7371 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7372 nwords = 1;
7373 else
7374 nwords = CEIL_NWORDS (size);
7376 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7378 if (size < UNITS_PER_WORD)
7379 XVECEXP (regs, 0, 0)
7380 = gen_rtx_EXPR_LIST (VOIDmode,
7381 gen_rtx_REG (SImode, regno),
7382 const0_rtx);
7383 else
7384 for (unsigned int i = 0; i < nwords; i++)
7385 XVECEXP (regs, 0, i)
7386 = gen_rtx_EXPR_LIST (VOIDmode,
7387 gen_rtx_REG (word_mode, regno + i * mult),
7388 GEN_INT (i * UNITS_PER_WORD));
7390 return regs;
7393 /* Determine where to put an argument to a function.
7394 Value is zero to push the argument on the stack,
7395 or a hard register in which to store the argument.
7397 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7398 the preceding args and about the function being called.
7399 ARG is a description of the argument.
7400 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7401 TARGET_FUNCTION_INCOMING_ARG. */
7403 static rtx
7404 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7405 bool incoming)
7407 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7408 const int regbase
7409 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7410 int slotno, regno, padding;
7411 tree type = arg.type;
7412 machine_mode mode = arg.mode;
7413 enum mode_class mclass = GET_MODE_CLASS (mode);
7414 bool named = arg.named;
7416 slotno
7417 = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7418 if (slotno == -1)
7419 return 0;
7421 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7422 if (type && VECTOR_INTEGER_TYPE_P (type))
7423 mclass = MODE_FLOAT;
7425 if (TARGET_ARCH32)
7426 return gen_rtx_REG (mode, regno);
7428 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7429 and are promoted to registers if possible. */
7430 if (type && TREE_CODE (type) == RECORD_TYPE)
7432 const int size = int_size_in_bytes (type);
7433 gcc_assert (size <= 16);
7435 return function_arg_record_value (type, mode, slotno, named, regbase);
7438 /* Unions up to 16 bytes in size are passed in integer registers. */
7439 else if (type && TREE_CODE (type) == UNION_TYPE)
7441 const int size = int_size_in_bytes (type);
7442 gcc_assert (size <= 16);
7444 return function_arg_union_value (size, mode, slotno, regno);
7447 /* Floating-point vectors up to 16 bytes are passed in registers. */
7448 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7450 const int size = int_size_in_bytes (type);
7451 gcc_assert (size <= 16);
7453 return function_arg_vector_value (size, slotno, named, regno);
7456 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7457 but also have the slot allocated for them.
7458 If no prototype is in scope fp values in register slots get passed
7459 in two places, either fp regs and int regs or fp regs and memory. */
7460 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7461 && SPARC_FP_REG_P (regno))
7463 rtx reg = gen_rtx_REG (mode, regno);
7464 if (cum->prototype_p || cum->libcall_p)
7465 return reg;
7466 else
7468 rtx v0, v1;
7470 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7472 int intreg;
7474 /* On incoming, we don't need to know that the value
7475 is passed in %f0 and %i0, and it confuses other parts
7476 causing needless spillage even on the simplest cases. */
7477 if (incoming)
7478 return reg;
7480 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7481 + (regno - SPARC_FP_ARG_FIRST) / 2);
7483 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7484 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7485 const0_rtx);
7486 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7488 else
7490 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7491 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7492 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7497 /* All other aggregate types are passed in an integer register in a mode
7498 corresponding to the size of the type. */
7499 else if (type && AGGREGATE_TYPE_P (type))
7501 const int size = int_size_in_bytes (type);
7502 gcc_assert (size <= 16);
7504 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7507 return gen_rtx_REG (mode, regno);
7510 /* Handle the TARGET_FUNCTION_ARG target hook. */
7512 static rtx
7513 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7515 return sparc_function_arg_1 (cum, arg, false);
7518 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7520 static rtx
7521 sparc_function_incoming_arg (cumulative_args_t cum,
7522 const function_arg_info &arg)
7524 return sparc_function_arg_1 (cum, arg, true);
7527 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7529 static unsigned int
7530 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7532 return ((TARGET_ARCH64
7533 && (GET_MODE_ALIGNMENT (mode) == 128
7534 || (type && TYPE_ALIGN (type) == 128)))
7535 ? 128
7536 : PARM_BOUNDARY);
7539 /* For an arg passed partly in registers and partly in memory,
7540 this is the number of bytes of registers used.
7541 For args passed entirely in registers or entirely in memory, zero.
7543 Any arg that starts in the first 6 regs but won't entirely fit in them
7544 needs partial registers on v8. On v9, structures with integer
7545 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7546 values that begin in the last fp reg [where "last fp reg" varies with the
7547 mode] will be split between that reg and memory. */
7549 static int
7550 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7552 int slotno, regno, padding;
7554 /* We pass false for incoming here, it doesn't matter. */
7555 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7556 arg.named, false, &regno, &padding);
7558 if (slotno == -1)
7559 return 0;
7561 if (TARGET_ARCH32)
7563 /* We are guaranteed by pass_by_reference that the size of the
7564 argument is not greater than 8 bytes, so we only need to return
7565 one word if the argument is partially passed in registers. */
7566 const int size = GET_MODE_SIZE (arg.mode);
7568 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7569 return UNITS_PER_WORD;
7571 else
7573 /* We are guaranteed by pass_by_reference that the size of the
7574 argument is not greater than 16 bytes, so we only need to return
7575 one word if the argument is partially passed in registers. */
7576 if (arg.aggregate_type_p ())
7578 const int size = int_size_in_bytes (arg.type);
7580 if (size > UNITS_PER_WORD
7581 && (slotno == SPARC_INT_ARG_MAX - 1
7582 || slotno == SPARC_FP_ARG_MAX - 1))
7583 return UNITS_PER_WORD;
7585 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7586 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7587 || (arg.type && VECTOR_TYPE_P (arg.type)))
7588 && !(TARGET_FPU && arg.named)))
7590 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7591 ? int_size_in_bytes (arg.type)
7592 : GET_MODE_SIZE (arg.mode);
7594 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7595 return UNITS_PER_WORD;
7597 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7598 || (arg.type && VECTOR_TYPE_P (arg.type)))
7600 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7601 ? int_size_in_bytes (arg.type)
7602 : GET_MODE_SIZE (arg.mode);
7604 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7605 return UNITS_PER_WORD;
7609 return 0;
7612 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7613 Update the data in CUM to advance over argument ARG. */
7615 static void
7616 sparc_function_arg_advance (cumulative_args_t cum_v,
7617 const function_arg_info &arg)
7619 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7620 tree type = arg.type;
7621 machine_mode mode = arg.mode;
7622 int regno, padding;
7624 /* We pass false for incoming here, it doesn't matter. */
7625 function_arg_slotno (cum, mode, type, arg.named, false, &regno, &padding);
7627 /* If argument requires leading padding, add it. */
7628 cum->words += padding;
7630 if (TARGET_ARCH32)
7631 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7632 else
7634 /* For types that can have BLKmode, get the size from the type. */
7635 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7637 const int size = int_size_in_bytes (type);
7639 /* See comment in function_arg_record_value for empty structures. */
7640 if (size <= 0)
7641 cum->words++;
7642 else
7643 cum->words += CEIL_NWORDS (size);
7645 else
7646 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7650 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7651 are always stored left shifted in their argument slot. */
7653 static pad_direction
7654 sparc_function_arg_padding (machine_mode mode, const_tree type)
7656 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7657 return PAD_UPWARD;
7659 /* Fall back to the default. */
7660 return default_function_arg_padding (mode, type);
7663 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7664 Specify whether to return the return value in memory. */
7666 static bool
7667 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7669 if (TARGET_ARCH32)
7670 /* Original SPARC 32-bit ABI says that structures and unions, and
7671 quad-precision floats are returned in memory. But note that the
7672 first part is implemented through -fpcc-struct-return being the
7673 default, so here we only implement -freg-struct-return instead.
7674 All other base types are returned in registers.
7676 Extended ABI (as implemented by the Sun compiler) says that
7677 all complex floats are returned in registers (8 FP registers
7678 at most for '_Complex long double'). Return all complex integers
7679 in registers (4 at most for '_Complex long long').
7681 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7682 integers are returned like floats of the same size, that is in
7683 registers up to 8 bytes and in memory otherwise. Return all
7684 vector floats in memory like structure and unions; note that
7685 they always have BLKmode like the latter. */
7686 return (TYPE_MODE (type) == BLKmode
7687 || TYPE_MODE (type) == TFmode
7688 || (TREE_CODE (type) == VECTOR_TYPE
7689 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7690 else
7691 /* Original SPARC 64-bit ABI says that structures and unions
7692 smaller than 32 bytes are returned in registers, as well as
7693 all other base types.
7695 Extended ABI (as implemented by the Sun compiler) says that all
7696 complex floats are returned in registers (8 FP registers at most
7697 for '_Complex long double'). Return all complex integers in
7698 registers (4 at most for '_Complex TItype').
7700 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7701 integers are returned like floats of the same size, that is in
7702 registers. Return all vector floats like structure and unions;
7703 note that they always have BLKmode like the latter. */
7704 return (TYPE_MODE (type) == BLKmode
7705 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7708 /* Handle the TARGET_STRUCT_VALUE target hook.
7709 Return where to find the structure return value address. */
7711 static rtx
7712 sparc_struct_value_rtx (tree fndecl, int incoming)
7714 if (TARGET_ARCH64)
7715 return NULL_RTX;
7716 else
7718 rtx mem;
7720 if (incoming)
7721 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7722 STRUCT_VALUE_OFFSET));
7723 else
7724 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7725 STRUCT_VALUE_OFFSET));
7727 /* Only follow the SPARC ABI for fixed-size structure returns.
7728 Variable size structure returns are handled per the normal
7729 procedures in GCC. This is enabled by -mstd-struct-return */
7730 if (incoming == 2
7731 && sparc_std_struct_return
7732 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7733 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7735 /* We must check and adjust the return address, as it is optional
7736 as to whether the return object is really provided. */
7737 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7738 rtx scratch = gen_reg_rtx (SImode);
7739 rtx_code_label *endlab = gen_label_rtx ();
7741 /* Calculate the return object size. */
7742 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7743 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7744 /* Construct a temporary return value. */
7745 rtx temp_val
7746 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7748 /* Implement SPARC 32-bit psABI callee return struct checking:
7750 Fetch the instruction where we will return to and see if
7751 it's an unimp instruction (the most significant 10 bits
7752 will be zero). */
7753 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7754 plus_constant (Pmode,
7755 ret_reg, 8)));
7756 /* Assume the size is valid and pre-adjust. */
7757 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7758 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7759 0, endlab);
7760 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7761 /* Write the address of the memory pointed to by temp_val into
7762 the memory pointed to by mem. */
7763 emit_move_insn (mem, XEXP (temp_val, 0));
7764 emit_label (endlab);
7767 return mem;
7771 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7772 For v9, function return values are subject to the same rules as arguments,
7773 except that up to 32 bytes may be returned in registers. */
7775 static rtx
7776 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7778 /* Beware that the two values are swapped here wrt function_arg. */
7779 const int regbase
7780 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7781 enum mode_class mclass = GET_MODE_CLASS (mode);
7782 int regno;
7784 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7785 Note that integer vectors larger than 16 bytes have BLKmode so
7786 they need to be handled like floating-point vectors below. */
7787 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7788 mclass = MODE_FLOAT;
7790 if (TARGET_ARCH64 && type)
7792 /* Structures up to 32 bytes in size are returned in registers. */
7793 if (TREE_CODE (type) == RECORD_TYPE)
7795 const int size = int_size_in_bytes (type);
7796 gcc_assert (size <= 32);
7798 return function_arg_record_value (type, mode, 0, true, regbase);
7801 /* Unions up to 32 bytes in size are returned in integer registers. */
7802 else if (TREE_CODE (type) == UNION_TYPE)
7804 const int size = int_size_in_bytes (type);
7805 gcc_assert (size <= 32);
7807 return function_arg_union_value (size, mode, 0, regbase);
7810 /* Vectors up to 32 bytes are returned in FP registers. */
7811 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7813 const int size = int_size_in_bytes (type);
7814 gcc_assert (size <= 32);
7816 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7819 /* Objects that require it are returned in FP registers. */
7820 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7823 /* All other aggregate types are returned in an integer register in a
7824 mode corresponding to the size of the type. */
7825 else if (AGGREGATE_TYPE_P (type))
7827 /* All other aggregate types are passed in an integer register
7828 in a mode corresponding to the size of the type. */
7829 const int size = int_size_in_bytes (type);
7830 gcc_assert (size <= 32);
7832 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7834 /* ??? We probably should have made the same ABI change in
7835 3.4.0 as the one we made for unions. The latter was
7836 required by the SCD though, while the former is not
7837 specified, so we favored compatibility and efficiency.
7839 Now we're stuck for aggregates larger than 16 bytes,
7840 because OImode vanished in the meantime. Let's not
7841 try to be unduly clever, and simply follow the ABI
7842 for unions in that case. */
7843 if (mode == BLKmode)
7844 return function_arg_union_value (size, mode, 0, regbase);
7845 else
7846 mclass = MODE_INT;
7849 /* We should only have pointer and integer types at this point. This
7850 must match sparc_promote_function_mode. */
7851 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7852 mode = word_mode;
7855 /* We should only have pointer and integer types at this point, except with
7856 -freg-struct-return. This must match sparc_promote_function_mode. */
7857 else if (TARGET_ARCH32
7858 && !(type && AGGREGATE_TYPE_P (type))
7859 && mclass == MODE_INT
7860 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7861 mode = word_mode;
7863 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7864 regno = SPARC_FP_ARG_FIRST;
7865 else
7866 regno = regbase;
7868 return gen_rtx_REG (mode, regno);
7871 /* Handle TARGET_FUNCTION_VALUE.
7872 On the SPARC, the value is found in the first "output" register, but the
7873 called function leaves it in the first "input" register. */
7875 static rtx
7876 sparc_function_value (const_tree valtype,
7877 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7878 bool outgoing)
7880 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7883 /* Handle TARGET_LIBCALL_VALUE. */
7885 static rtx
7886 sparc_libcall_value (machine_mode mode,
7887 const_rtx fun ATTRIBUTE_UNUSED)
7889 return sparc_function_value_1 (NULL_TREE, mode, false);
7892 /* Handle FUNCTION_VALUE_REGNO_P.
7893 On the SPARC, the first "output" reg is used for integer values, and the
7894 first floating point register is used for floating point values. */
7896 static bool
7897 sparc_function_value_regno_p (const unsigned int regno)
7899 return (regno == 8 || (TARGET_FPU && regno == 32));
7902 /* Do what is necessary for `va_start'. We look at the current function
7903 to determine if stdarg or varargs is used and return the address of
7904 the first unnamed parameter. */
7906 static rtx
7907 sparc_builtin_saveregs (void)
7909 int first_reg = crtl->args.info.words;
7910 rtx address;
7911 int regno;
7913 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7914 emit_move_insn (gen_rtx_MEM (word_mode,
7915 gen_rtx_PLUS (Pmode,
7916 frame_pointer_rtx,
7917 GEN_INT (FIRST_PARM_OFFSET (0)
7918 + (UNITS_PER_WORD
7919 * regno)))),
7920 gen_rtx_REG (word_mode,
7921 SPARC_INCOMING_INT_ARG_FIRST + regno));
7923 address = gen_rtx_PLUS (Pmode,
7924 frame_pointer_rtx,
7925 GEN_INT (FIRST_PARM_OFFSET (0)
7926 + UNITS_PER_WORD * first_reg));
7928 return address;
7931 /* Implement `va_start' for stdarg. */
7933 static void
7934 sparc_va_start (tree valist, rtx nextarg)
7936 nextarg = expand_builtin_saveregs ();
7937 std_expand_builtin_va_start (valist, nextarg);
7940 /* Implement `va_arg' for stdarg. */
7942 static tree
7943 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7944 gimple_seq *post_p)
7946 HOST_WIDE_INT size, rsize, align;
7947 tree addr, incr;
7948 bool indirect;
7949 tree ptrtype = build_pointer_type (type);
7951 if (pass_va_arg_by_reference (type))
7953 indirect = true;
7954 size = rsize = UNITS_PER_WORD;
7955 align = 0;
7957 else
7959 indirect = false;
7960 size = int_size_in_bytes (type);
7961 rsize = ROUND_UP (size, UNITS_PER_WORD);
7962 align = 0;
7964 if (TARGET_ARCH64)
7966 /* For SPARC64, objects requiring 16-byte alignment get it. */
7967 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7968 align = 2 * UNITS_PER_WORD;
7970 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7971 are left-justified in their slots. */
7972 if (AGGREGATE_TYPE_P (type))
7974 if (size == 0)
7975 size = rsize = UNITS_PER_WORD;
7976 else
7977 size = rsize;
7982 incr = valist;
7983 if (align)
7985 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7986 incr = fold_convert (sizetype, incr);
7987 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7988 size_int (-align));
7989 incr = fold_convert (ptr_type_node, incr);
7992 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7993 addr = incr;
7995 if (BYTES_BIG_ENDIAN && size < rsize)
7996 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
7998 if (indirect)
8000 addr = fold_convert (build_pointer_type (ptrtype), addr);
8001 addr = build_va_arg_indirect_ref (addr);
8004 /* If the address isn't aligned properly for the type, we need a temporary.
8005 FIXME: This is inefficient, usually we can do this in registers. */
8006 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8008 tree tmp = create_tmp_var (type, "va_arg_tmp");
8009 tree dest_addr = build_fold_addr_expr (tmp);
8010 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8011 3, dest_addr, addr, size_int (rsize));
8012 TREE_ADDRESSABLE (tmp) = 1;
8013 gimplify_and_add (copy, pre_p);
8014 addr = dest_addr;
8017 else
8018 addr = fold_convert (ptrtype, addr);
8020 incr = fold_build_pointer_plus_hwi (incr, rsize);
8021 gimplify_assign (valist, incr, post_p);
8023 return build_va_arg_indirect_ref (addr);
8026 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8027 Specify whether the vector mode is supported by the hardware. */
8029 static bool
8030 sparc_vector_mode_supported_p (machine_mode mode)
8032 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8035 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8037 static machine_mode
8038 sparc_preferred_simd_mode (scalar_mode mode)
8040 if (TARGET_VIS)
8041 switch (mode)
8043 case E_SImode:
8044 return V2SImode;
8045 case E_HImode:
8046 return V4HImode;
8047 case E_QImode:
8048 return V8QImode;
8050 default:;
8053 return word_mode;
8056 \f/* Implement TARGET_CAN_FOLLOW_JUMP. */
8058 static bool
8059 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8061 /* Do not fold unconditional jumps that have been created for crossing
8062 partition boundaries. */
8063 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8064 return false;
8066 return true;
8069 /* Return the string to output an unconditional branch to LABEL, which is
8070 the operand number of the label.
8072 DEST is the destination insn (i.e. the label), INSN is the source. */
8074 const char *
8075 output_ubranch (rtx dest, rtx_insn *insn)
8077 static char string[64];
8078 bool v9_form = false;
8079 int delta;
8080 char *p;
8082 /* Even if we are trying to use cbcond for this, evaluate
8083 whether we can use V9 branches as our backup plan. */
8084 delta = 5000000;
8085 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8086 delta = (INSN_ADDRESSES (INSN_UID (dest))
8087 - INSN_ADDRESSES (INSN_UID (insn)));
8089 /* Leave some instructions for "slop". */
8090 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8091 v9_form = true;
8093 if (TARGET_CBCOND)
8095 bool emit_nop = emit_cbcond_nop (insn);
8096 bool far = false;
8097 const char *rval;
8099 if (delta < -500 || delta > 500)
8100 far = true;
8102 if (far)
8104 if (v9_form)
8105 rval = "ba,a,pt\t%%xcc, %l0";
8106 else
8107 rval = "b,a\t%l0";
8109 else
8111 if (emit_nop)
8112 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8113 else
8114 rval = "cwbe\t%%g0, %%g0, %l0";
8116 return rval;
8119 if (v9_form)
8120 strcpy (string, "ba%*,pt\t%%xcc, ");
8121 else
8122 strcpy (string, "b%*\t");
8124 p = strchr (string, '\0');
8125 *p++ = '%';
8126 *p++ = 'l';
8127 *p++ = '0';
8128 *p++ = '%';
8129 *p++ = '(';
8130 *p = '\0';
8132 return string;
8135 /* Return the string to output a conditional branch to LABEL, which is
8136 the operand number of the label. OP is the conditional expression.
8137 XEXP (OP, 0) is assumed to be a condition code register (integer or
8138 floating point) and its mode specifies what kind of comparison we made.
8140 DEST is the destination insn (i.e. the label), INSN is the source.
8142 REVERSED is nonzero if we should reverse the sense of the comparison.
8144 ANNUL is nonzero if we should generate an annulling branch. */
8146 const char *
8147 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8148 rtx_insn *insn)
8150 static char string[64];
8151 enum rtx_code code = GET_CODE (op);
8152 rtx cc_reg = XEXP (op, 0);
8153 machine_mode mode = GET_MODE (cc_reg);
8154 const char *labelno, *branch;
8155 int spaces = 8, far;
8156 char *p;
8158 /* v9 branches are limited to +-1MB. If it is too far away,
8159 change
8161 bne,pt %xcc, .LC30
8165 be,pn %xcc, .+12
8167 ba .LC30
8171 fbne,a,pn %fcc2, .LC29
8175 fbe,pt %fcc2, .+16
8177 ba .LC29 */
8179 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8180 if (reversed ^ far)
8182 /* Reversal of FP compares takes care -- an ordered compare
8183 becomes an unordered compare and vice versa. */
8184 if (mode == CCFPmode || mode == CCFPEmode)
8185 code = reverse_condition_maybe_unordered (code);
8186 else
8187 code = reverse_condition (code);
8190 /* Start by writing the branch condition. */
8191 if (mode == CCFPmode || mode == CCFPEmode)
8193 switch (code)
8195 case NE:
8196 branch = "fbne";
8197 break;
8198 case EQ:
8199 branch = "fbe";
8200 break;
8201 case GE:
8202 branch = "fbge";
8203 break;
8204 case GT:
8205 branch = "fbg";
8206 break;
8207 case LE:
8208 branch = "fble";
8209 break;
8210 case LT:
8211 branch = "fbl";
8212 break;
8213 case UNORDERED:
8214 branch = "fbu";
8215 break;
8216 case ORDERED:
8217 branch = "fbo";
8218 break;
8219 case UNGT:
8220 branch = "fbug";
8221 break;
8222 case UNLT:
8223 branch = "fbul";
8224 break;
8225 case UNEQ:
8226 branch = "fbue";
8227 break;
8228 case UNGE:
8229 branch = "fbuge";
8230 break;
8231 case UNLE:
8232 branch = "fbule";
8233 break;
8234 case LTGT:
8235 branch = "fblg";
8236 break;
8237 default:
8238 gcc_unreachable ();
8241 /* ??? !v9: FP branches cannot be preceded by another floating point
8242 insn. Because there is currently no concept of pre-delay slots,
8243 we can fix this only by always emitting a nop before a floating
8244 point branch. */
8246 string[0] = '\0';
8247 if (! TARGET_V9)
8248 strcpy (string, "nop\n\t");
8249 strcat (string, branch);
8251 else
8253 switch (code)
8255 case NE:
8256 if (mode == CCVmode || mode == CCXVmode)
8257 branch = "bvs";
8258 else
8259 branch = "bne";
8260 break;
8261 case EQ:
8262 if (mode == CCVmode || mode == CCXVmode)
8263 branch = "bvc";
8264 else
8265 branch = "be";
8266 break;
8267 case GE:
8268 if (mode == CCNZmode || mode == CCXNZmode)
8269 branch = "bpos";
8270 else
8271 branch = "bge";
8272 break;
8273 case GT:
8274 branch = "bg";
8275 break;
8276 case LE:
8277 branch = "ble";
8278 break;
8279 case LT:
8280 if (mode == CCNZmode || mode == CCXNZmode)
8281 branch = "bneg";
8282 else
8283 branch = "bl";
8284 break;
8285 case GEU:
8286 branch = "bgeu";
8287 break;
8288 case GTU:
8289 branch = "bgu";
8290 break;
8291 case LEU:
8292 branch = "bleu";
8293 break;
8294 case LTU:
8295 branch = "blu";
8296 break;
8297 default:
8298 gcc_unreachable ();
8300 strcpy (string, branch);
8302 spaces -= strlen (branch);
8303 p = strchr (string, '\0');
8305 /* Now add the annulling, the label, and a possible noop. */
8306 if (annul && ! far)
8308 strcpy (p, ",a");
8309 p += 2;
8310 spaces -= 2;
8313 if (TARGET_V9)
8315 rtx note;
8316 int v8 = 0;
8318 if (! far && insn && INSN_ADDRESSES_SET_P ())
8320 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8321 - INSN_ADDRESSES (INSN_UID (insn)));
8322 /* Leave some instructions for "slop". */
8323 if (delta < -260000 || delta >= 260000)
8324 v8 = 1;
8327 switch (mode)
8329 case E_CCmode:
8330 case E_CCNZmode:
8331 case E_CCCmode:
8332 case E_CCVmode:
8333 labelno = "%%icc, ";
8334 if (v8)
8335 labelno = "";
8336 break;
8337 case E_CCXmode:
8338 case E_CCXNZmode:
8339 case E_CCXCmode:
8340 case E_CCXVmode:
8341 labelno = "%%xcc, ";
8342 gcc_assert (!v8);
8343 break;
8344 case E_CCFPmode:
8345 case E_CCFPEmode:
8347 static char v9_fcc_labelno[] = "%%fccX, ";
8348 /* Set the char indicating the number of the fcc reg to use. */
8349 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8350 labelno = v9_fcc_labelno;
8351 if (v8)
8353 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8354 labelno = "";
8357 break;
8358 default:
8359 gcc_unreachable ();
8362 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8364 strcpy (p,
8365 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8366 >= profile_probability::even ()) ^ far)
8367 ? ",pt" : ",pn");
8368 p += 3;
8369 spaces -= 3;
8372 else
8373 labelno = "";
8375 if (spaces > 0)
8376 *p++ = '\t';
8377 else
8378 *p++ = ' ';
8379 strcpy (p, labelno);
8380 p = strchr (p, '\0');
8381 if (far)
8383 strcpy (p, ".+12\n\t nop\n\tb\t");
8384 /* Skip the next insn if requested or
8385 if we know that it will be a nop. */
8386 if (annul || ! final_sequence)
8387 p[3] = '6';
8388 p += 14;
8390 *p++ = '%';
8391 *p++ = 'l';
8392 *p++ = label + '0';
8393 *p++ = '%';
8394 *p++ = '#';
8395 *p = '\0';
8397 return string;
8400 /* Emit a library call comparison between floating point X and Y.
8401 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8402 Return the new operator to be used in the comparison sequence.
8404 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8405 values as arguments instead of the TFmode registers themselves,
8406 that's why we cannot call emit_float_lib_cmp. */
8409 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8411 const char *qpfunc;
8412 rtx slot0, slot1, result, tem, tem2, libfunc;
8413 machine_mode mode;
8414 enum rtx_code new_comparison;
8416 switch (comparison)
8418 case EQ:
8419 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8420 break;
8422 case NE:
8423 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8424 break;
8426 case GT:
8427 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8428 break;
8430 case GE:
8431 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8432 break;
8434 case LT:
8435 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8436 break;
8438 case LE:
8439 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8440 break;
8442 case ORDERED:
8443 case UNORDERED:
8444 case UNGT:
8445 case UNLT:
8446 case UNEQ:
8447 case UNGE:
8448 case UNLE:
8449 case LTGT:
8450 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8451 break;
8453 default:
8454 gcc_unreachable ();
8457 if (TARGET_ARCH64)
8459 if (MEM_P (x))
8461 tree expr = MEM_EXPR (x);
8462 if (expr)
8463 mark_addressable (expr);
8464 slot0 = x;
8466 else
8468 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8469 emit_move_insn (slot0, x);
8472 if (MEM_P (y))
8474 tree expr = MEM_EXPR (y);
8475 if (expr)
8476 mark_addressable (expr);
8477 slot1 = y;
8479 else
8481 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8482 emit_move_insn (slot1, y);
8485 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8486 emit_library_call (libfunc, LCT_NORMAL,
8487 DImode,
8488 XEXP (slot0, 0), Pmode,
8489 XEXP (slot1, 0), Pmode);
8490 mode = DImode;
8492 else
8494 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8495 emit_library_call (libfunc, LCT_NORMAL,
8496 SImode,
8497 x, TFmode, y, TFmode);
8498 mode = SImode;
8502 /* Immediately move the result of the libcall into a pseudo
8503 register so reload doesn't clobber the value if it needs
8504 the return register for a spill reg. */
8505 result = gen_reg_rtx (mode);
8506 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8508 switch (comparison)
8510 default:
8511 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8512 case ORDERED:
8513 case UNORDERED:
8514 new_comparison = (comparison == UNORDERED ? EQ : NE);
8515 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8516 case UNGT:
8517 case UNGE:
8518 new_comparison = (comparison == UNGT ? GT : NE);
8519 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8520 case UNLE:
8521 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8522 case UNLT:
8523 tem = gen_reg_rtx (mode);
8524 if (TARGET_ARCH32)
8525 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8526 else
8527 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8528 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8529 case UNEQ:
8530 case LTGT:
8531 tem = gen_reg_rtx (mode);
8532 if (TARGET_ARCH32)
8533 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8534 else
8535 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8536 tem2 = gen_reg_rtx (mode);
8537 if (TARGET_ARCH32)
8538 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8539 else
8540 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8541 new_comparison = (comparison == UNEQ ? EQ : NE);
8542 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8545 gcc_unreachable ();
8548 /* Generate an unsigned DImode to FP conversion. This is the same code
8549 optabs would emit if we didn't have TFmode patterns. */
8551 void
8552 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8554 rtx i0, i1, f0, in, out;
8556 out = operands[0];
8557 in = force_reg (DImode, operands[1]);
8558 rtx_code_label *neglab = gen_label_rtx ();
8559 rtx_code_label *donelab = gen_label_rtx ();
8560 i0 = gen_reg_rtx (DImode);
8561 i1 = gen_reg_rtx (DImode);
8562 f0 = gen_reg_rtx (mode);
8564 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8566 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8567 emit_jump_insn (gen_jump (donelab));
8568 emit_barrier ();
8570 emit_label (neglab);
8572 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8573 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8574 emit_insn (gen_iordi3 (i0, i0, i1));
8575 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8576 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8578 emit_label (donelab);
8581 /* Generate an FP to unsigned DImode conversion. This is the same code
8582 optabs would emit if we didn't have TFmode patterns. */
8584 void
8585 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8587 rtx i0, i1, f0, in, out, limit;
8589 out = operands[0];
8590 in = force_reg (mode, operands[1]);
8591 rtx_code_label *neglab = gen_label_rtx ();
8592 rtx_code_label *donelab = gen_label_rtx ();
8593 i0 = gen_reg_rtx (DImode);
8594 i1 = gen_reg_rtx (DImode);
8595 limit = gen_reg_rtx (mode);
8596 f0 = gen_reg_rtx (mode);
8598 emit_move_insn (limit,
8599 const_double_from_real_value (
8600 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8601 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8603 emit_insn (gen_rtx_SET (out,
8604 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8605 emit_jump_insn (gen_jump (donelab));
8606 emit_barrier ();
8608 emit_label (neglab);
8610 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8611 emit_insn (gen_rtx_SET (i0,
8612 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8613 emit_insn (gen_movdi (i1, const1_rtx));
8614 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8615 emit_insn (gen_xordi3 (out, i0, i1));
8617 emit_label (donelab);
8620 /* Return the string to output a compare and branch instruction to DEST.
8621 DEST is the destination insn (i.e. the label), INSN is the source,
8622 and OP is the conditional expression. */
8624 const char *
8625 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8627 machine_mode mode = GET_MODE (XEXP (op, 0));
8628 enum rtx_code code = GET_CODE (op);
8629 const char *cond_str, *tmpl;
8630 int far, emit_nop, len;
8631 static char string[64];
8632 char size_char;
8634 /* Compare and Branch is limited to +-2KB. If it is too far away,
8635 change
8637 cxbne X, Y, .LC30
8641 cxbe X, Y, .+16
8643 ba,pt xcc, .LC30
8644 nop */
8646 len = get_attr_length (insn);
8648 far = len == 4;
8649 emit_nop = len == 2;
8651 if (far)
8652 code = reverse_condition (code);
8654 size_char = ((mode == SImode) ? 'w' : 'x');
8656 switch (code)
8658 case NE:
8659 cond_str = "ne";
8660 break;
8662 case EQ:
8663 cond_str = "e";
8664 break;
8666 case GE:
8667 cond_str = "ge";
8668 break;
8670 case GT:
8671 cond_str = "g";
8672 break;
8674 case LE:
8675 cond_str = "le";
8676 break;
8678 case LT:
8679 cond_str = "l";
8680 break;
8682 case GEU:
8683 cond_str = "cc";
8684 break;
8686 case GTU:
8687 cond_str = "gu";
8688 break;
8690 case LEU:
8691 cond_str = "leu";
8692 break;
8694 case LTU:
8695 cond_str = "cs";
8696 break;
8698 default:
8699 gcc_unreachable ();
8702 if (far)
8704 int veryfar = 1, delta;
8706 if (INSN_ADDRESSES_SET_P ())
8708 delta = (INSN_ADDRESSES (INSN_UID (dest))
8709 - INSN_ADDRESSES (INSN_UID (insn)));
8710 /* Leave some instructions for "slop". */
8711 if (delta >= -260000 && delta < 260000)
8712 veryfar = 0;
8715 if (veryfar)
8716 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8717 else
8718 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8720 else
8722 if (emit_nop)
8723 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8724 else
8725 tmpl = "c%cb%s\t%%1, %%2, %%3";
8728 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8730 return string;
8733 /* Return the string to output a conditional branch to LABEL, testing
8734 register REG. LABEL is the operand number of the label; REG is the
8735 operand number of the reg. OP is the conditional expression. The mode
8736 of REG says what kind of comparison we made.
8738 DEST is the destination insn (i.e. the label), INSN is the source.
8740 REVERSED is nonzero if we should reverse the sense of the comparison.
8742 ANNUL is nonzero if we should generate an annulling branch. */
8744 const char *
8745 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8746 int annul, rtx_insn *insn)
8748 static char string[64];
8749 enum rtx_code code = GET_CODE (op);
8750 machine_mode mode = GET_MODE (XEXP (op, 0));
8751 rtx note;
8752 int far;
8753 char *p;
8755 /* branch on register are limited to +-128KB. If it is too far away,
8756 change
8758 brnz,pt %g1, .LC30
8762 brz,pn %g1, .+12
8764 ba,pt %xcc, .LC30
8768 brgez,a,pn %o1, .LC29
8772 brlz,pt %o1, .+16
8774 ba,pt %xcc, .LC29 */
8776 far = get_attr_length (insn) >= 3;
8778 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8779 if (reversed ^ far)
8780 code = reverse_condition (code);
8782 /* Only 64-bit versions of these instructions exist. */
8783 gcc_assert (mode == DImode);
8785 /* Start by writing the branch condition. */
8787 switch (code)
8789 case NE:
8790 strcpy (string, "brnz");
8791 break;
8793 case EQ:
8794 strcpy (string, "brz");
8795 break;
8797 case GE:
8798 strcpy (string, "brgez");
8799 break;
8801 case LT:
8802 strcpy (string, "brlz");
8803 break;
8805 case LE:
8806 strcpy (string, "brlez");
8807 break;
8809 case GT:
8810 strcpy (string, "brgz");
8811 break;
8813 default:
8814 gcc_unreachable ();
8817 p = strchr (string, '\0');
8819 /* Now add the annulling, reg, label, and nop. */
8820 if (annul && ! far)
8822 strcpy (p, ",a");
8823 p += 2;
8826 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8828 strcpy (p,
8829 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8830 >= profile_probability::even ()) ^ far)
8831 ? ",pt" : ",pn");
8832 p += 3;
8835 *p = p < string + 8 ? '\t' : ' ';
8836 p++;
8837 *p++ = '%';
8838 *p++ = '0' + reg;
8839 *p++ = ',';
8840 *p++ = ' ';
8841 if (far)
8843 int veryfar = 1, delta;
8845 if (INSN_ADDRESSES_SET_P ())
8847 delta = (INSN_ADDRESSES (INSN_UID (dest))
8848 - INSN_ADDRESSES (INSN_UID (insn)));
8849 /* Leave some instructions for "slop". */
8850 if (delta >= -260000 && delta < 260000)
8851 veryfar = 0;
8854 strcpy (p, ".+12\n\t nop\n\t");
8855 /* Skip the next insn if requested or
8856 if we know that it will be a nop. */
8857 if (annul || ! final_sequence)
8858 p[3] = '6';
8859 p += 12;
8860 if (veryfar)
8862 strcpy (p, "b\t");
8863 p += 2;
8865 else
8867 strcpy (p, "ba,pt\t%%xcc, ");
8868 p += 13;
8871 *p++ = '%';
8872 *p++ = 'l';
8873 *p++ = '0' + label;
8874 *p++ = '%';
8875 *p++ = '#';
8876 *p = '\0';
8878 return string;
8881 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8882 Such instructions cannot be used in the delay slot of return insn on v9.
8883 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8886 static int
8887 epilogue_renumber (rtx *where, int test)
8889 const char *fmt;
8890 int i;
8891 enum rtx_code code;
8893 if (*where == 0)
8894 return 0;
8896 code = GET_CODE (*where);
8898 switch (code)
8900 case REG:
8901 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8902 return 1;
8903 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8904 *where = gen_rtx_REG (GET_MODE (*where), OUTGOING_REGNO (REGNO(*where)));
8905 /* fallthrough */
8906 case SCRATCH:
8907 case CC0:
8908 case PC:
8909 case CONST_INT:
8910 case CONST_WIDE_INT:
8911 case CONST_DOUBLE:
8912 return 0;
8914 /* Do not replace the frame pointer with the stack pointer because
8915 it can cause the delayed instruction to load below the stack.
8916 This occurs when instructions like:
8918 (set (reg/i:SI 24 %i0)
8919 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8920 (const_int -20 [0xffffffec])) 0))
8922 are in the return delayed slot. */
8923 case PLUS:
8924 if (GET_CODE (XEXP (*where, 0)) == REG
8925 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8926 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8927 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8928 return 1;
8929 break;
8931 case MEM:
8932 if (SPARC_STACK_BIAS
8933 && GET_CODE (XEXP (*where, 0)) == REG
8934 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8935 return 1;
8936 break;
8938 default:
8939 break;
8942 fmt = GET_RTX_FORMAT (code);
8944 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8946 if (fmt[i] == 'E')
8948 int j;
8949 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8950 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8951 return 1;
8953 else if (fmt[i] == 'e'
8954 && epilogue_renumber (&(XEXP (*where, i)), test))
8955 return 1;
8957 return 0;
8960 /* Leaf functions and non-leaf functions have different needs. */
8962 static const int
8963 reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8965 static const int
8966 reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8968 static const int *const reg_alloc_orders[] = {
8969 reg_leaf_alloc_order,
8970 reg_nonleaf_alloc_order};
8972 void
8973 order_regs_for_local_alloc (void)
8975 static int last_order_nonleaf = 1;
8977 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8979 last_order_nonleaf = !last_order_nonleaf;
8980 memcpy ((char *) reg_alloc_order,
8981 (const char *) reg_alloc_orders[last_order_nonleaf],
8982 FIRST_PSEUDO_REGISTER * sizeof (int));
8986 /* Return 1 if REG and MEM are legitimate enough to allow the various
8987 MEM<-->REG splits to be run. */
8990 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
8992 /* Punt if we are here by mistake. */
8993 gcc_assert (reload_completed);
8995 /* We must have an offsettable memory reference. */
8996 if (!offsettable_memref_p (mem))
8997 return 0;
8999 /* If we have legitimate args for ldd/std, we do not want
9000 the split to happen. */
9001 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9002 return 0;
9004 /* Success. */
9005 return 1;
9008 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9010 void
9011 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9013 rtx high_part = gen_highpart (mode, dest);
9014 rtx low_part = gen_lowpart (mode, dest);
9015 rtx word0 = adjust_address (src, mode, 0);
9016 rtx word1 = adjust_address (src, mode, 4);
9018 if (reg_overlap_mentioned_p (high_part, word1))
9020 emit_move_insn_1 (low_part, word1);
9021 emit_move_insn_1 (high_part, word0);
9023 else
9025 emit_move_insn_1 (high_part, word0);
9026 emit_move_insn_1 (low_part, word1);
9030 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9032 void
9033 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9035 rtx word0 = adjust_address (dest, mode, 0);
9036 rtx word1 = adjust_address (dest, mode, 4);
9037 rtx high_part = gen_highpart (mode, src);
9038 rtx low_part = gen_lowpart (mode, src);
9040 emit_move_insn_1 (word0, high_part);
9041 emit_move_insn_1 (word1, low_part);
9044 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9047 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9049 /* Punt if we are here by mistake. */
9050 gcc_assert (reload_completed);
9052 if (GET_CODE (reg1) == SUBREG)
9053 reg1 = SUBREG_REG (reg1);
9054 if (GET_CODE (reg1) != REG)
9055 return 0;
9056 const int regno1 = REGNO (reg1);
9058 if (GET_CODE (reg2) == SUBREG)
9059 reg2 = SUBREG_REG (reg2);
9060 if (GET_CODE (reg2) != REG)
9061 return 0;
9062 const int regno2 = REGNO (reg2);
9064 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9065 return 1;
9067 if (TARGET_VIS3)
9069 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9070 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9071 return 1;
9074 return 0;
9077 /* Split a REG <--> REG move into a pair of moves in MODE. */
9079 void
9080 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9082 rtx dest1 = gen_highpart (mode, dest);
9083 rtx dest2 = gen_lowpart (mode, dest);
9084 rtx src1 = gen_highpart (mode, src);
9085 rtx src2 = gen_lowpart (mode, src);
9087 /* Now emit using the real source and destination we found, swapping
9088 the order if we detect overlap. */
9089 if (reg_overlap_mentioned_p (dest1, src2))
9091 emit_move_insn_1 (dest2, src2);
9092 emit_move_insn_1 (dest1, src1);
9094 else
9096 emit_move_insn_1 (dest1, src1);
9097 emit_move_insn_1 (dest2, src2);
9101 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9102 This makes them candidates for using ldd and std insns.
9104 Note reg1 and reg2 *must* be hard registers. */
9107 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9109 /* We might have been passed a SUBREG. */
9110 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9111 return 0;
9113 if (REGNO (reg1) % 2 != 0)
9114 return 0;
9116 /* Integer ldd is deprecated in SPARC V9 */
9117 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9118 return 0;
9120 return (REGNO (reg1) == REGNO (reg2) - 1);
9123 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9124 an ldd or std insn.
9126 This can only happen when addr1 and addr2, the addresses in mem1
9127 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9128 addr1 must also be aligned on a 64-bit boundary.
9130 Also iff dependent_reg_rtx is not null it should not be used to
9131 compute the address for mem1, i.e. we cannot optimize a sequence
9132 like:
9133 ld [%o0], %o0
9134 ld [%o0 + 4], %o1
9136 ldd [%o0], %o0
9137 nor:
9138 ld [%g3 + 4], %g3
9139 ld [%g3], %g2
9141 ldd [%g3], %g2
9143 But, note that the transformation from:
9144 ld [%g2 + 4], %g3
9145 ld [%g2], %g2
9147 ldd [%g2], %g2
9148 is perfectly fine. Thus, the peephole2 patterns always pass us
9149 the destination register of the first load, never the second one.
9151 For stores we don't have a similar problem, so dependent_reg_rtx is
9152 NULL_RTX. */
9155 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9157 rtx addr1, addr2;
9158 unsigned int reg1;
9159 HOST_WIDE_INT offset1;
9161 /* The mems cannot be volatile. */
9162 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9163 return 0;
9165 /* MEM1 should be aligned on a 64-bit boundary. */
9166 if (MEM_ALIGN (mem1) < 64)
9167 return 0;
9169 addr1 = XEXP (mem1, 0);
9170 addr2 = XEXP (mem2, 0);
9172 /* Extract a register number and offset (if used) from the first addr. */
9173 if (GET_CODE (addr1) == PLUS)
9175 /* If not a REG, return zero. */
9176 if (GET_CODE (XEXP (addr1, 0)) != REG)
9177 return 0;
9178 else
9180 reg1 = REGNO (XEXP (addr1, 0));
9181 /* The offset must be constant! */
9182 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9183 return 0;
9184 offset1 = INTVAL (XEXP (addr1, 1));
9187 else if (GET_CODE (addr1) != REG)
9188 return 0;
9189 else
9191 reg1 = REGNO (addr1);
9192 /* This was a simple (mem (reg)) expression. Offset is 0. */
9193 offset1 = 0;
9196 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9197 if (GET_CODE (addr2) != PLUS)
9198 return 0;
9200 if (GET_CODE (XEXP (addr2, 0)) != REG
9201 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9202 return 0;
9204 if (reg1 != REGNO (XEXP (addr2, 0)))
9205 return 0;
9207 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9208 return 0;
9210 /* The first offset must be evenly divisible by 8 to ensure the
9211 address is 64-bit aligned. */
9212 if (offset1 % 8 != 0)
9213 return 0;
9215 /* The offset for the second addr must be 4 more than the first addr. */
9216 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9217 return 0;
9219 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9220 instructions. */
9221 return 1;
9224 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9227 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9229 rtx x = widen_memory_access (mem1, mode, 0);
9230 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9231 return x;
9234 /* Return 1 if reg is a pseudo, or is the first register in
9235 a hard register pair. This makes it suitable for use in
9236 ldd and std insns. */
9239 register_ok_for_ldd (rtx reg)
9241 /* We might have been passed a SUBREG. */
9242 if (!REG_P (reg))
9243 return 0;
9245 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9246 return (REGNO (reg) % 2 == 0);
9248 return 1;
9251 /* Return 1 if OP, a MEM, has an address which is known to be
9252 aligned to an 8-byte boundary. */
9255 memory_ok_for_ldd (rtx op)
9257 /* In 64-bit mode, we assume that the address is word-aligned. */
9258 if (TARGET_ARCH32 && !mem_min_alignment (op, 8))
9259 return 0;
9261 if (! can_create_pseudo_p ()
9262 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9263 return 0;
9265 return 1;
9268 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9270 static bool
9271 sparc_print_operand_punct_valid_p (unsigned char code)
9273 if (code == '#'
9274 || code == '*'
9275 || code == '('
9276 || code == ')'
9277 || code == '_'
9278 || code == '&')
9279 return true;
9281 return false;
9284 /* Implement TARGET_PRINT_OPERAND.
9285 Print operand X (an rtx) in assembler syntax to file FILE.
9286 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9287 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9289 static void
9290 sparc_print_operand (FILE *file, rtx x, int code)
9292 const char *s;
9294 switch (code)
9296 case '#':
9297 /* Output an insn in a delay slot. */
9298 if (final_sequence)
9299 sparc_indent_opcode = 1;
9300 else
9301 fputs ("\n\t nop", file);
9302 return;
9303 case '*':
9304 /* Output an annul flag if there's nothing for the delay slot and we
9305 are optimizing. This is always used with '(' below.
9306 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9307 this is a dbx bug. So, we only do this when optimizing.
9308 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9309 Always emit a nop in case the next instruction is a branch. */
9310 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9311 fputs (",a", file);
9312 return;
9313 case '(':
9314 /* Output a 'nop' if there's nothing for the delay slot and we are
9315 not optimizing. This is always used with '*' above. */
9316 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9317 fputs ("\n\t nop", file);
9318 else if (final_sequence)
9319 sparc_indent_opcode = 1;
9320 return;
9321 case ')':
9322 /* Output the right displacement from the saved PC on function return.
9323 The caller may have placed an "unimp" insn immediately after the call
9324 so we have to account for it. This insn is used in the 32-bit ABI
9325 when calling a function that returns a non zero-sized structure. The
9326 64-bit ABI doesn't have it. Be careful to have this test be the same
9327 as that for the call. The exception is when sparc_std_struct_return
9328 is enabled, the psABI is followed exactly and the adjustment is made
9329 by the code in sparc_struct_value_rtx. The call emitted is the same
9330 when sparc_std_struct_return is enabled. */
9331 if (!TARGET_ARCH64
9332 && cfun->returns_struct
9333 && !sparc_std_struct_return
9334 && DECL_SIZE (DECL_RESULT (current_function_decl))
9335 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9336 == INTEGER_CST
9337 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9338 fputs ("12", file);
9339 else
9340 fputc ('8', file);
9341 return;
9342 case '_':
9343 /* Output the Embedded Medium/Anywhere code model base register. */
9344 fputs (EMBMEDANY_BASE_REG, file);
9345 return;
9346 case '&':
9347 /* Print some local dynamic TLS name. */
9348 if (const char *name = get_some_local_dynamic_name ())
9349 assemble_name (file, name);
9350 else
9351 output_operand_lossage ("'%%&' used without any "
9352 "local dynamic TLS references");
9353 return;
9355 case 'Y':
9356 /* Adjust the operand to take into account a RESTORE operation. */
9357 if (GET_CODE (x) == CONST_INT)
9358 break;
9359 else if (GET_CODE (x) != REG)
9360 output_operand_lossage ("invalid %%Y operand");
9361 else if (REGNO (x) < 8)
9362 fputs (reg_names[REGNO (x)], file);
9363 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9364 fputs (reg_names[REGNO (x)-16], file);
9365 else
9366 output_operand_lossage ("invalid %%Y operand");
9367 return;
9368 case 'L':
9369 /* Print out the low order register name of a register pair. */
9370 if (WORDS_BIG_ENDIAN)
9371 fputs (reg_names[REGNO (x)+1], file);
9372 else
9373 fputs (reg_names[REGNO (x)], file);
9374 return;
9375 case 'H':
9376 /* Print out the high order register name of a register pair. */
9377 if (WORDS_BIG_ENDIAN)
9378 fputs (reg_names[REGNO (x)], file);
9379 else
9380 fputs (reg_names[REGNO (x)+1], file);
9381 return;
9382 case 'R':
9383 /* Print out the second register name of a register pair or quad.
9384 I.e., R (%o0) => %o1. */
9385 fputs (reg_names[REGNO (x)+1], file);
9386 return;
9387 case 'S':
9388 /* Print out the third register name of a register quad.
9389 I.e., S (%o0) => %o2. */
9390 fputs (reg_names[REGNO (x)+2], file);
9391 return;
9392 case 'T':
9393 /* Print out the fourth register name of a register quad.
9394 I.e., T (%o0) => %o3. */
9395 fputs (reg_names[REGNO (x)+3], file);
9396 return;
9397 case 'x':
9398 /* Print a condition code register. */
9399 if (REGNO (x) == SPARC_ICC_REG)
9401 switch (GET_MODE (x))
9403 case E_CCmode:
9404 case E_CCNZmode:
9405 case E_CCCmode:
9406 case E_CCVmode:
9407 s = "%icc";
9408 break;
9409 case E_CCXmode:
9410 case E_CCXNZmode:
9411 case E_CCXCmode:
9412 case E_CCXVmode:
9413 s = "%xcc";
9414 break;
9415 default:
9416 gcc_unreachable ();
9418 fputs (s, file);
9420 else
9421 /* %fccN register */
9422 fputs (reg_names[REGNO (x)], file);
9423 return;
9424 case 'm':
9425 /* Print the operand's address only. */
9426 output_address (GET_MODE (x), XEXP (x, 0));
9427 return;
9428 case 'r':
9429 /* In this case we need a register. Use %g0 if the
9430 operand is const0_rtx. */
9431 if (x == const0_rtx
9432 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9434 fputs ("%g0", file);
9435 return;
9437 else
9438 break;
9440 case 'A':
9441 switch (GET_CODE (x))
9443 case IOR:
9444 s = "or";
9445 break;
9446 case AND:
9447 s = "and";
9448 break;
9449 case XOR:
9450 s = "xor";
9451 break;
9452 default:
9453 output_operand_lossage ("invalid %%A operand");
9454 s = "";
9455 break;
9457 fputs (s, file);
9458 return;
9460 case 'B':
9461 switch (GET_CODE (x))
9463 case IOR:
9464 s = "orn";
9465 break;
9466 case AND:
9467 s = "andn";
9468 break;
9469 case XOR:
9470 s = "xnor";
9471 break;
9472 default:
9473 output_operand_lossage ("invalid %%B operand");
9474 s = "";
9475 break;
9477 fputs (s, file);
9478 return;
9480 /* This is used by the conditional move instructions. */
9481 case 'C':
9483 machine_mode mode = GET_MODE (XEXP (x, 0));
9484 switch (GET_CODE (x))
9486 case NE:
9487 if (mode == CCVmode || mode == CCXVmode)
9488 s = "vs";
9489 else
9490 s = "ne";
9491 break;
9492 case EQ:
9493 if (mode == CCVmode || mode == CCXVmode)
9494 s = "vc";
9495 else
9496 s = "e";
9497 break;
9498 case GE:
9499 if (mode == CCNZmode || mode == CCXNZmode)
9500 s = "pos";
9501 else
9502 s = "ge";
9503 break;
9504 case GT:
9505 s = "g";
9506 break;
9507 case LE:
9508 s = "le";
9509 break;
9510 case LT:
9511 if (mode == CCNZmode || mode == CCXNZmode)
9512 s = "neg";
9513 else
9514 s = "l";
9515 break;
9516 case GEU:
9517 s = "geu";
9518 break;
9519 case GTU:
9520 s = "gu";
9521 break;
9522 case LEU:
9523 s = "leu";
9524 break;
9525 case LTU:
9526 s = "lu";
9527 break;
9528 case LTGT:
9529 s = "lg";
9530 break;
9531 case UNORDERED:
9532 s = "u";
9533 break;
9534 case ORDERED:
9535 s = "o";
9536 break;
9537 case UNLT:
9538 s = "ul";
9539 break;
9540 case UNLE:
9541 s = "ule";
9542 break;
9543 case UNGT:
9544 s = "ug";
9545 break;
9546 case UNGE:
9547 s = "uge"
9548 ; break;
9549 case UNEQ:
9550 s = "ue";
9551 break;
9552 default:
9553 output_operand_lossage ("invalid %%C operand");
9554 s = "";
9555 break;
9557 fputs (s, file);
9558 return;
9561 /* This are used by the movr instruction pattern. */
9562 case 'D':
9564 switch (GET_CODE (x))
9566 case NE:
9567 s = "ne";
9568 break;
9569 case EQ:
9570 s = "e";
9571 break;
9572 case GE:
9573 s = "gez";
9574 break;
9575 case LT:
9576 s = "lz";
9577 break;
9578 case LE:
9579 s = "lez";
9580 break;
9581 case GT:
9582 s = "gz";
9583 break;
9584 default:
9585 output_operand_lossage ("invalid %%D operand");
9586 s = "";
9587 break;
9589 fputs (s, file);
9590 return;
9593 case 'b':
9595 /* Print a sign-extended character. */
9596 int i = trunc_int_for_mode (INTVAL (x), QImode);
9597 fprintf (file, "%d", i);
9598 return;
9601 case 'f':
9602 /* Operand must be a MEM; write its address. */
9603 if (GET_CODE (x) != MEM)
9604 output_operand_lossage ("invalid %%f operand");
9605 output_address (GET_MODE (x), XEXP (x, 0));
9606 return;
9608 case 's':
9610 /* Print a sign-extended 32-bit value. */
9611 HOST_WIDE_INT i;
9612 if (GET_CODE(x) == CONST_INT)
9613 i = INTVAL (x);
9614 else
9616 output_operand_lossage ("invalid %%s operand");
9617 return;
9619 i = trunc_int_for_mode (i, SImode);
9620 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9621 return;
9624 case 0:
9625 /* Do nothing special. */
9626 break;
9628 default:
9629 /* Undocumented flag. */
9630 output_operand_lossage ("invalid operand output code");
9633 if (GET_CODE (x) == REG)
9634 fputs (reg_names[REGNO (x)], file);
9635 else if (GET_CODE (x) == MEM)
9637 fputc ('[', file);
9638 /* Poor Sun assembler doesn't understand absolute addressing. */
9639 if (CONSTANT_P (XEXP (x, 0)))
9640 fputs ("%g0+", file);
9641 output_address (GET_MODE (x), XEXP (x, 0));
9642 fputc (']', file);
9644 else if (GET_CODE (x) == HIGH)
9646 fputs ("%hi(", file);
9647 output_addr_const (file, XEXP (x, 0));
9648 fputc (')', file);
9650 else if (GET_CODE (x) == LO_SUM)
9652 sparc_print_operand (file, XEXP (x, 0), 0);
9653 if (TARGET_CM_MEDMID)
9654 fputs ("+%l44(", file);
9655 else
9656 fputs ("+%lo(", file);
9657 output_addr_const (file, XEXP (x, 1));
9658 fputc (')', file);
9660 else if (GET_CODE (x) == CONST_DOUBLE)
9661 output_operand_lossage ("floating-point constant not a valid immediate operand");
9662 else
9663 output_addr_const (file, x);
9666 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9668 static void
9669 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9671 rtx base, index = 0;
9672 int offset = 0;
9673 rtx addr = x;
9675 if (REG_P (addr))
9676 fputs (reg_names[REGNO (addr)], file);
9677 else if (GET_CODE (addr) == PLUS)
9679 if (CONST_INT_P (XEXP (addr, 0)))
9680 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9681 else if (CONST_INT_P (XEXP (addr, 1)))
9682 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9683 else
9684 base = XEXP (addr, 0), index = XEXP (addr, 1);
9685 if (GET_CODE (base) == LO_SUM)
9687 gcc_assert (USE_AS_OFFSETABLE_LO10
9688 && TARGET_ARCH64
9689 && ! TARGET_CM_MEDMID);
9690 output_operand (XEXP (base, 0), 0);
9691 fputs ("+%lo(", file);
9692 output_address (VOIDmode, XEXP (base, 1));
9693 fprintf (file, ")+%d", offset);
9695 else
9697 fputs (reg_names[REGNO (base)], file);
9698 if (index == 0)
9699 fprintf (file, "%+d", offset);
9700 else if (REG_P (index))
9701 fprintf (file, "+%s", reg_names[REGNO (index)]);
9702 else if (GET_CODE (index) == SYMBOL_REF
9703 || GET_CODE (index) == LABEL_REF
9704 || GET_CODE (index) == CONST)
9705 fputc ('+', file), output_addr_const (file, index);
9706 else gcc_unreachable ();
9709 else if (GET_CODE (addr) == MINUS
9710 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9712 output_addr_const (file, XEXP (addr, 0));
9713 fputs ("-(", file);
9714 output_addr_const (file, XEXP (addr, 1));
9715 fputs ("-.)", file);
9717 else if (GET_CODE (addr) == LO_SUM)
9719 output_operand (XEXP (addr, 0), 0);
9720 if (TARGET_CM_MEDMID)
9721 fputs ("+%l44(", file);
9722 else
9723 fputs ("+%lo(", file);
9724 output_address (VOIDmode, XEXP (addr, 1));
9725 fputc (')', file);
9727 else if (flag_pic
9728 && GET_CODE (addr) == CONST
9729 && GET_CODE (XEXP (addr, 0)) == MINUS
9730 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9731 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9732 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9734 addr = XEXP (addr, 0);
9735 output_addr_const (file, XEXP (addr, 0));
9736 /* Group the args of the second CONST in parenthesis. */
9737 fputs ("-(", file);
9738 /* Skip past the second CONST--it does nothing for us. */
9739 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9740 /* Close the parenthesis. */
9741 fputc (')', file);
9743 else
9745 output_addr_const (file, addr);
9749 /* Target hook for assembling integer objects. The sparc version has
9750 special handling for aligned DI-mode objects. */
9752 static bool
9753 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9755 /* ??? We only output .xword's for symbols and only then in environments
9756 where the assembler can handle them. */
9757 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9759 if (TARGET_V9)
9761 assemble_integer_with_op ("\t.xword\t", x);
9762 return true;
9764 else
9766 assemble_aligned_integer (4, const0_rtx);
9767 assemble_aligned_integer (4, x);
9768 return true;
9771 return default_assemble_integer (x, size, aligned_p);
9774 /* Return the value of a code used in the .proc pseudo-op that says
9775 what kind of result this function returns. For non-C types, we pick
9776 the closest C type. */
9778 #ifndef SHORT_TYPE_SIZE
9779 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9780 #endif
9782 #ifndef INT_TYPE_SIZE
9783 #define INT_TYPE_SIZE BITS_PER_WORD
9784 #endif
9786 #ifndef LONG_TYPE_SIZE
9787 #define LONG_TYPE_SIZE BITS_PER_WORD
9788 #endif
9790 #ifndef LONG_LONG_TYPE_SIZE
9791 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9792 #endif
9794 #ifndef FLOAT_TYPE_SIZE
9795 #define FLOAT_TYPE_SIZE BITS_PER_WORD
9796 #endif
9798 #ifndef DOUBLE_TYPE_SIZE
9799 #define DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9800 #endif
9802 #ifndef LONG_DOUBLE_TYPE_SIZE
9803 #define LONG_DOUBLE_TYPE_SIZE (BITS_PER_WORD * 2)
9804 #endif
9806 unsigned long
9807 sparc_type_code (tree type)
9809 unsigned long qualifiers = 0;
9810 unsigned shift;
9812 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9813 setting more, since some assemblers will give an error for this. Also,
9814 we must be careful to avoid shifts of 32 bits or more to avoid getting
9815 unpredictable results. */
9817 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9819 switch (TREE_CODE (type))
9821 case ERROR_MARK:
9822 return qualifiers;
9824 case ARRAY_TYPE:
9825 qualifiers |= (3 << shift);
9826 break;
9828 case FUNCTION_TYPE:
9829 case METHOD_TYPE:
9830 qualifiers |= (2 << shift);
9831 break;
9833 case POINTER_TYPE:
9834 case REFERENCE_TYPE:
9835 case OFFSET_TYPE:
9836 qualifiers |= (1 << shift);
9837 break;
9839 case RECORD_TYPE:
9840 return (qualifiers | 8);
9842 case UNION_TYPE:
9843 case QUAL_UNION_TYPE:
9844 return (qualifiers | 9);
9846 case ENUMERAL_TYPE:
9847 return (qualifiers | 10);
9849 case VOID_TYPE:
9850 return (qualifiers | 16);
9852 case INTEGER_TYPE:
9853 /* If this is a range type, consider it to be the underlying
9854 type. */
9855 if (TREE_TYPE (type) != 0)
9856 break;
9858 /* Carefully distinguish all the standard types of C,
9859 without messing up if the language is not C. We do this by
9860 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9861 look at both the names and the above fields, but that's redundant.
9862 Any type whose size is between two C types will be considered
9863 to be the wider of the two types. Also, we do not have a
9864 special code to use for "long long", so anything wider than
9865 long is treated the same. Note that we can't distinguish
9866 between "int" and "long" in this code if they are the same
9867 size, but that's fine, since neither can the assembler. */
9869 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9870 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9872 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9873 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9875 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9876 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9878 else
9879 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9881 case REAL_TYPE:
9882 /* If this is a range type, consider it to be the underlying
9883 type. */
9884 if (TREE_TYPE (type) != 0)
9885 break;
9887 /* Carefully distinguish all the standard types of C,
9888 without messing up if the language is not C. */
9890 if (TYPE_PRECISION (type) == FLOAT_TYPE_SIZE)
9891 return (qualifiers | 6);
9893 else
9894 return (qualifiers | 7);
9896 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9897 /* ??? We need to distinguish between double and float complex types,
9898 but I don't know how yet because I can't reach this code from
9899 existing front-ends. */
9900 return (qualifiers | 7); /* Who knows? */
9902 case VECTOR_TYPE:
9903 case BOOLEAN_TYPE: /* Boolean truth value type. */
9904 case LANG_TYPE:
9905 case NULLPTR_TYPE:
9906 return qualifiers;
9908 default:
9909 gcc_unreachable (); /* Not a type! */
9913 return qualifiers;
9916 /* Nested function support. */
9918 /* Emit RTL insns to initialize the variable parts of a trampoline.
9919 FNADDR is an RTX for the address of the function's pure code.
9920 CXT is an RTX for the static chain value for the function.
9922 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9923 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9924 (to store insns). This is a bit excessive. Perhaps a different
9925 mechanism would be better here.
9927 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9929 static void
9930 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9932 /* SPARC 32-bit trampoline:
9934 sethi %hi(fn), %g1
9935 sethi %hi(static), %g2
9936 jmp %g1+%lo(fn)
9937 or %g2, %lo(static), %g2
9939 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9940 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9943 emit_move_insn
9944 (adjust_address (m_tramp, SImode, 0),
9945 expand_binop (SImode, ior_optab,
9946 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9947 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9948 NULL_RTX, 1, OPTAB_DIRECT));
9950 emit_move_insn
9951 (adjust_address (m_tramp, SImode, 4),
9952 expand_binop (SImode, ior_optab,
9953 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9954 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9955 NULL_RTX, 1, OPTAB_DIRECT));
9957 emit_move_insn
9958 (adjust_address (m_tramp, SImode, 8),
9959 expand_binop (SImode, ior_optab,
9960 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9961 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9962 NULL_RTX, 1, OPTAB_DIRECT));
9964 emit_move_insn
9965 (adjust_address (m_tramp, SImode, 12),
9966 expand_binop (SImode, ior_optab,
9967 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9968 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9969 NULL_RTX, 1, OPTAB_DIRECT));
9971 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
9972 aligned on a 16 byte boundary so one flush clears it all. */
9973 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 0))));
9974 if (sparc_cpu != PROCESSOR_ULTRASPARC
9975 && sparc_cpu != PROCESSOR_ULTRASPARC3
9976 && sparc_cpu != PROCESSOR_NIAGARA
9977 && sparc_cpu != PROCESSOR_NIAGARA2
9978 && sparc_cpu != PROCESSOR_NIAGARA3
9979 && sparc_cpu != PROCESSOR_NIAGARA4
9980 && sparc_cpu != PROCESSOR_NIAGARA7
9981 && sparc_cpu != PROCESSOR_M8)
9982 emit_insn (gen_flushsi (validize_mem (adjust_address (m_tramp, SImode, 8))));
9984 /* Call __enable_execute_stack after writing onto the stack to make sure
9985 the stack address is accessible. */
9986 #ifdef HAVE_ENABLE_EXECUTE_STACK
9987 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
9988 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
9989 #endif
9993 /* The 64-bit version is simpler because it makes more sense to load the
9994 values as "immediate" data out of the trampoline. It's also easier since
9995 we can read the PC without clobbering a register. */
9997 static void
9998 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10000 /* SPARC 64-bit trampoline:
10002 rd %pc, %g1
10003 ldx [%g1+24], %g5
10004 jmp %g5
10005 ldx [%g1+16], %g5
10006 +16 bytes data
10009 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10010 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10011 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10012 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10013 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10014 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10015 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10016 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10017 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10018 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10019 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 0))));
10021 if (sparc_cpu != PROCESSOR_ULTRASPARC
10022 && sparc_cpu != PROCESSOR_ULTRASPARC3
10023 && sparc_cpu != PROCESSOR_NIAGARA
10024 && sparc_cpu != PROCESSOR_NIAGARA2
10025 && sparc_cpu != PROCESSOR_NIAGARA3
10026 && sparc_cpu != PROCESSOR_NIAGARA4
10027 && sparc_cpu != PROCESSOR_NIAGARA7
10028 && sparc_cpu != PROCESSOR_M8)
10029 emit_insn (gen_flushdi (validize_mem (adjust_address (m_tramp, DImode, 8))));
10031 /* Call __enable_execute_stack after writing onto the stack to make sure
10032 the stack address is accessible. */
10033 #ifdef HAVE_ENABLE_EXECUTE_STACK
10034 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10035 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10036 #endif
10039 /* Worker for TARGET_TRAMPOLINE_INIT. */
10041 static void
10042 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10044 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10045 cxt = force_reg (Pmode, cxt);
10046 if (TARGET_ARCH64)
10047 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10048 else
10049 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10052 /* Adjust the cost of a scheduling dependency. Return the new cost of
10053 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10055 static int
10056 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10057 int cost)
10059 enum attr_type insn_type;
10061 if (recog_memoized (insn) < 0)
10062 return cost;
10064 insn_type = get_attr_type (insn);
10066 if (dep_type == 0)
10068 /* Data dependency; DEP_INSN writes a register that INSN reads some
10069 cycles later. */
10071 /* if a load, then the dependence must be on the memory address;
10072 add an extra "cycle". Note that the cost could be two cycles
10073 if the reg was written late in an instruction group; we ca not tell
10074 here. */
10075 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10076 return cost + 3;
10078 /* Get the delay only if the address of the store is the dependence. */
10079 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10081 rtx pat = PATTERN(insn);
10082 rtx dep_pat = PATTERN (dep_insn);
10084 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10085 return cost; /* This should not happen! */
10087 /* The dependency between the two instructions was on the data that
10088 is being stored. Assume that this implies that the address of the
10089 store is not dependent. */
10090 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10091 return cost;
10093 return cost + 3; /* An approximation. */
10096 /* A shift instruction cannot receive its data from an instruction
10097 in the same cycle; add a one cycle penalty. */
10098 if (insn_type == TYPE_SHIFT)
10099 return cost + 3; /* Split before cascade into shift. */
10101 else
10103 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10104 INSN writes some cycles later. */
10106 /* These are only significant for the fpu unit; writing a fp reg before
10107 the fpu has finished with it stalls the processor. */
10109 /* Reusing an integer register causes no problems. */
10110 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10111 return 0;
10114 return cost;
10117 static int
10118 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10119 int cost)
10121 enum attr_type insn_type, dep_type;
10122 rtx pat = PATTERN(insn);
10123 rtx dep_pat = PATTERN (dep_insn);
10125 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10126 return cost;
10128 insn_type = get_attr_type (insn);
10129 dep_type = get_attr_type (dep_insn);
10131 switch (dtype)
10133 case 0:
10134 /* Data dependency; DEP_INSN writes a register that INSN reads some
10135 cycles later. */
10137 switch (insn_type)
10139 case TYPE_STORE:
10140 case TYPE_FPSTORE:
10141 /* Get the delay iff the address of the store is the dependence. */
10142 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10143 return cost;
10145 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10146 return cost;
10147 return cost + 3;
10149 case TYPE_LOAD:
10150 case TYPE_SLOAD:
10151 case TYPE_FPLOAD:
10152 /* If a load, then the dependence must be on the memory address. If
10153 the addresses aren't equal, then it might be a false dependency */
10154 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10156 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10157 || GET_CODE (SET_DEST (dep_pat)) != MEM
10158 || GET_CODE (SET_SRC (pat)) != MEM
10159 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10160 XEXP (SET_SRC (pat), 0)))
10161 return cost + 2;
10163 return cost + 8;
10165 break;
10167 case TYPE_BRANCH:
10168 /* Compare to branch latency is 0. There is no benefit from
10169 separating compare and branch. */
10170 if (dep_type == TYPE_COMPARE)
10171 return 0;
10172 /* Floating point compare to branch latency is less than
10173 compare to conditional move. */
10174 if (dep_type == TYPE_FPCMP)
10175 return cost - 1;
10176 break;
10177 default:
10178 break;
10180 break;
10182 case REG_DEP_ANTI:
10183 /* Anti-dependencies only penalize the fpu unit. */
10184 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10185 return 0;
10186 break;
10188 default:
10189 break;
10192 return cost;
10195 static int
10196 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10197 unsigned int)
10199 switch (sparc_cpu)
10201 case PROCESSOR_SUPERSPARC:
10202 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10203 break;
10204 case PROCESSOR_HYPERSPARC:
10205 case PROCESSOR_SPARCLITE86X:
10206 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10207 break;
10208 default:
10209 break;
10211 return cost;
10214 static void
10215 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10216 int sched_verbose ATTRIBUTE_UNUSED,
10217 int max_ready ATTRIBUTE_UNUSED)
10220 static int
10221 sparc_use_sched_lookahead (void)
10223 switch (sparc_cpu)
10225 case PROCESSOR_ULTRASPARC:
10226 case PROCESSOR_ULTRASPARC3:
10227 return 4;
10228 case PROCESSOR_SUPERSPARC:
10229 case PROCESSOR_HYPERSPARC:
10230 case PROCESSOR_SPARCLITE86X:
10231 return 3;
10232 case PROCESSOR_NIAGARA4:
10233 case PROCESSOR_NIAGARA7:
10234 case PROCESSOR_M8:
10235 return 2;
10236 case PROCESSOR_NIAGARA:
10237 case PROCESSOR_NIAGARA2:
10238 case PROCESSOR_NIAGARA3:
10239 default:
10240 return 0;
10244 static int
10245 sparc_issue_rate (void)
10247 switch (sparc_cpu)
10249 case PROCESSOR_ULTRASPARC:
10250 case PROCESSOR_ULTRASPARC3:
10251 case PROCESSOR_M8:
10252 return 4;
10253 case PROCESSOR_SUPERSPARC:
10254 return 3;
10255 case PROCESSOR_HYPERSPARC:
10256 case PROCESSOR_SPARCLITE86X:
10257 case PROCESSOR_V9:
10258 /* Assume V9 processors are capable of at least dual-issue. */
10259 case PROCESSOR_NIAGARA4:
10260 case PROCESSOR_NIAGARA7:
10261 return 2;
10262 case PROCESSOR_NIAGARA:
10263 case PROCESSOR_NIAGARA2:
10264 case PROCESSOR_NIAGARA3:
10265 default:
10266 return 1;
10271 sparc_branch_cost (bool speed_p, bool predictable_p)
10273 if (!speed_p)
10274 return 2;
10276 /* For pre-V9 processors we use a single value (usually 3) to take into
10277 account the potential annulling of the delay slot (which ends up being
10278 a bubble in the pipeline slot) plus a cycle to take into consideration
10279 the instruction cache effects.
10281 On V9 and later processors, which have branch prediction facilities,
10282 we take into account whether the branch is (easily) predictable. */
10283 const int cost = sparc_costs->branch_cost;
10285 switch (sparc_cpu)
10287 case PROCESSOR_V9:
10288 case PROCESSOR_ULTRASPARC:
10289 case PROCESSOR_ULTRASPARC3:
10290 case PROCESSOR_NIAGARA:
10291 case PROCESSOR_NIAGARA2:
10292 case PROCESSOR_NIAGARA3:
10293 case PROCESSOR_NIAGARA4:
10294 case PROCESSOR_NIAGARA7:
10295 case PROCESSOR_M8:
10296 return cost + (predictable_p ? 0 : 2);
10298 default:
10299 return cost;
10303 static int
10304 set_extends (rtx_insn *insn)
10306 rtx pat = PATTERN (insn);
10308 switch (GET_CODE (SET_SRC (pat)))
10310 /* Load and some shift instructions zero extend. */
10311 case MEM:
10312 case ZERO_EXTEND:
10313 /* sethi clears the high bits */
10314 case HIGH:
10315 /* LO_SUM is used with sethi. sethi cleared the high
10316 bits and the values used with lo_sum are positive */
10317 case LO_SUM:
10318 /* Store flag stores 0 or 1 */
10319 case LT: case LTU:
10320 case GT: case GTU:
10321 case LE: case LEU:
10322 case GE: case GEU:
10323 case EQ:
10324 case NE:
10325 return 1;
10326 case AND:
10328 rtx op0 = XEXP (SET_SRC (pat), 0);
10329 rtx op1 = XEXP (SET_SRC (pat), 1);
10330 if (GET_CODE (op1) == CONST_INT)
10331 return INTVAL (op1) >= 0;
10332 if (GET_CODE (op0) != REG)
10333 return 0;
10334 if (sparc_check_64 (op0, insn) == 1)
10335 return 1;
10336 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10338 case IOR:
10339 case XOR:
10341 rtx op0 = XEXP (SET_SRC (pat), 0);
10342 rtx op1 = XEXP (SET_SRC (pat), 1);
10343 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10344 return 0;
10345 if (GET_CODE (op1) == CONST_INT)
10346 return INTVAL (op1) >= 0;
10347 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10349 case LSHIFTRT:
10350 return GET_MODE (SET_SRC (pat)) == SImode;
10351 /* Positive integers leave the high bits zero. */
10352 case CONST_INT:
10353 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10354 case ASHIFTRT:
10355 case SIGN_EXTEND:
10356 return - (GET_MODE (SET_SRC (pat)) == SImode);
10357 case REG:
10358 return sparc_check_64 (SET_SRC (pat), insn);
10359 default:
10360 return 0;
10364 /* We _ought_ to have only one kind per function, but... */
10365 static GTY(()) rtx sparc_addr_diff_list;
10366 static GTY(()) rtx sparc_addr_list;
10368 void
10369 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10371 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10372 if (diff)
10373 sparc_addr_diff_list
10374 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10375 else
10376 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10379 static void
10380 sparc_output_addr_vec (rtx vec)
10382 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10383 int idx, vlen = XVECLEN (body, 0);
10385 #ifdef ASM_OUTPUT_ADDR_VEC_START
10386 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10387 #endif
10389 #ifdef ASM_OUTPUT_CASE_LABEL
10390 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10391 NEXT_INSN (lab));
10392 #else
10393 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10394 #endif
10396 for (idx = 0; idx < vlen; idx++)
10398 ASM_OUTPUT_ADDR_VEC_ELT
10399 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10402 #ifdef ASM_OUTPUT_ADDR_VEC_END
10403 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10404 #endif
10407 static void
10408 sparc_output_addr_diff_vec (rtx vec)
10410 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10411 rtx base = XEXP (XEXP (body, 0), 0);
10412 int idx, vlen = XVECLEN (body, 1);
10414 #ifdef ASM_OUTPUT_ADDR_VEC_START
10415 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10416 #endif
10418 #ifdef ASM_OUTPUT_CASE_LABEL
10419 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10420 NEXT_INSN (lab));
10421 #else
10422 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10423 #endif
10425 for (idx = 0; idx < vlen; idx++)
10427 ASM_OUTPUT_ADDR_DIFF_ELT
10428 (asm_out_file,
10429 body,
10430 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10431 CODE_LABEL_NUMBER (base));
10434 #ifdef ASM_OUTPUT_ADDR_VEC_END
10435 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10436 #endif
10439 static void
10440 sparc_output_deferred_case_vectors (void)
10442 rtx t;
10443 int align;
10445 if (sparc_addr_list == NULL_RTX
10446 && sparc_addr_diff_list == NULL_RTX)
10447 return;
10449 /* Align to cache line in the function's code section. */
10450 switch_to_section (current_function_section ());
10452 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10453 if (align > 0)
10454 ASM_OUTPUT_ALIGN (asm_out_file, align);
10456 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10457 sparc_output_addr_vec (XEXP (t, 0));
10458 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10459 sparc_output_addr_diff_vec (XEXP (t, 0));
10461 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10464 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10465 unknown. Return 1 if the high bits are zero, -1 if the register is
10466 sign extended. */
10468 sparc_check_64 (rtx x, rtx_insn *insn)
10470 /* If a register is set only once it is safe to ignore insns this
10471 code does not know how to handle. The loop will either recognize
10472 the single set and return the correct value or fail to recognize
10473 it and return 0. */
10474 int set_once = 0;
10475 rtx y = x;
10477 gcc_assert (GET_CODE (x) == REG);
10479 if (GET_MODE (x) == DImode)
10480 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10482 if (flag_expensive_optimizations
10483 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10484 set_once = 1;
10486 if (insn == 0)
10488 if (set_once)
10489 insn = get_last_insn_anywhere ();
10490 else
10491 return 0;
10494 while ((insn = PREV_INSN (insn)))
10496 switch (GET_CODE (insn))
10498 case JUMP_INSN:
10499 case NOTE:
10500 break;
10501 case CODE_LABEL:
10502 case CALL_INSN:
10503 default:
10504 if (! set_once)
10505 return 0;
10506 break;
10507 case INSN:
10509 rtx pat = PATTERN (insn);
10510 if (GET_CODE (pat) != SET)
10511 return 0;
10512 if (rtx_equal_p (x, SET_DEST (pat)))
10513 return set_extends (insn);
10514 if (y && rtx_equal_p (y, SET_DEST (pat)))
10515 return set_extends (insn);
10516 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10517 return 0;
10521 return 0;
10524 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10525 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10527 const char *
10528 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10530 static char asm_code[60];
10532 /* The scratch register is only required when the destination
10533 register is not a 64-bit global or out register. */
10534 if (which_alternative != 2)
10535 operands[3] = operands[0];
10537 /* We can only shift by constants <= 63. */
10538 if (GET_CODE (operands[2]) == CONST_INT)
10539 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10541 if (GET_CODE (operands[1]) == CONST_INT)
10543 output_asm_insn ("mov\t%1, %3", operands);
10545 else
10547 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10548 if (sparc_check_64 (operands[1], insn) <= 0)
10549 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10550 output_asm_insn ("or\t%L1, %3, %3", operands);
10553 strcpy (asm_code, opcode);
10555 if (which_alternative != 2)
10556 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10557 else
10558 return
10559 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10562 /* Output rtl to increment the profiler label LABELNO
10563 for profiling a function entry. */
10565 void
10566 sparc_profile_hook (int labelno)
10568 char buf[32];
10569 rtx lab, fun;
10571 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10572 if (NO_PROFILE_COUNTERS)
10574 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10576 else
10578 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10579 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10580 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10584 #ifdef TARGET_SOLARIS
10585 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10587 static void
10588 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10589 tree decl ATTRIBUTE_UNUSED)
10591 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10593 solaris_elf_asm_comdat_section (name, flags, decl);
10594 return;
10597 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10599 if (!(flags & SECTION_DEBUG))
10600 fputs (",#alloc", asm_out_file);
10601 #if HAVE_GAS_SECTION_EXCLUDE
10602 if (flags & SECTION_EXCLUDE)
10603 fputs (",#exclude", asm_out_file);
10604 #endif
10605 if (flags & SECTION_WRITE)
10606 fputs (",#write", asm_out_file);
10607 if (flags & SECTION_TLS)
10608 fputs (",#tls", asm_out_file);
10609 if (flags & SECTION_CODE)
10610 fputs (",#execinstr", asm_out_file);
10612 if (flags & SECTION_NOTYPE)
10614 else if (flags & SECTION_BSS)
10615 fputs (",#nobits", asm_out_file);
10616 else
10617 fputs (",#progbits", asm_out_file);
10619 fputc ('\n', asm_out_file);
10621 #endif /* TARGET_SOLARIS */
10623 /* We do not allow indirect calls to be optimized into sibling calls.
10625 We cannot use sibling calls when delayed branches are disabled
10626 because they will likely require the call delay slot to be filled.
10628 Also, on SPARC 32-bit we cannot emit a sibling call when the
10629 current function returns a structure. This is because the "unimp
10630 after call" convention would cause the callee to return to the
10631 wrong place. The generic code already disallows cases where the
10632 function being called returns a structure.
10634 It may seem strange how this last case could occur. Usually there
10635 is code after the call which jumps to epilogue code which dumps the
10636 return value into the struct return area. That ought to invalidate
10637 the sibling call right? Well, in the C++ case we can end up passing
10638 the pointer to the struct return area to a constructor (which returns
10639 void) and then nothing else happens. Such a sibling call would look
10640 valid without the added check here.
10642 VxWorks PIC PLT entries require the global pointer to be initialized
10643 on entry. We therefore can't emit sibling calls to them. */
10644 static bool
10645 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10647 return (decl
10648 && flag_delayed_branch
10649 && (TARGET_ARCH64 || ! cfun->returns_struct)
10650 && !(TARGET_VXWORKS_RTP
10651 && flag_pic
10652 && !targetm.binds_local_p (decl)));
10655 /* libfunc renaming. */
10657 static void
10658 sparc_init_libfuncs (void)
10660 if (TARGET_ARCH32)
10662 /* Use the subroutines that Sun's library provides for integer
10663 multiply and divide. The `*' prevents an underscore from
10664 being prepended by the compiler. .umul is a little faster
10665 than .mul. */
10666 set_optab_libfunc (smul_optab, SImode, "*.umul");
10667 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10668 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10669 set_optab_libfunc (smod_optab, SImode, "*.rem");
10670 set_optab_libfunc (umod_optab, SImode, "*.urem");
10672 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10673 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10674 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10675 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10676 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10677 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10679 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10680 is because with soft-float, the SFmode and DFmode sqrt
10681 instructions will be absent, and the compiler will notice and
10682 try to use the TFmode sqrt instruction for calls to the
10683 builtin function sqrt, but this fails. */
10684 if (TARGET_FPU)
10685 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10687 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10688 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10689 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10690 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10691 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10692 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10694 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10695 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10696 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10697 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10699 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10700 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10701 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10702 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10704 if (DITF_CONVERSION_LIBFUNCS)
10706 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10707 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10708 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10709 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10712 if (SUN_CONVERSION_LIBFUNCS)
10714 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10715 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10716 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10717 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10720 if (TARGET_ARCH64)
10722 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10723 do not exist in the library. Make sure the compiler does not
10724 emit calls to them by accident. (It should always use the
10725 hardware instructions.) */
10726 set_optab_libfunc (smul_optab, SImode, 0);
10727 set_optab_libfunc (sdiv_optab, SImode, 0);
10728 set_optab_libfunc (udiv_optab, SImode, 0);
10729 set_optab_libfunc (smod_optab, SImode, 0);
10730 set_optab_libfunc (umod_optab, SImode, 0);
10732 if (SUN_INTEGER_MULTIPLY_64)
10734 set_optab_libfunc (smul_optab, DImode, "__mul64");
10735 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10736 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10737 set_optab_libfunc (smod_optab, DImode, "__rem64");
10738 set_optab_libfunc (umod_optab, DImode, "__urem64");
10741 if (SUN_CONVERSION_LIBFUNCS)
10743 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10744 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10745 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10746 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10751 /* SPARC builtins. */
10752 enum sparc_builtins
10754 /* FPU builtins. */
10755 SPARC_BUILTIN_LDFSR,
10756 SPARC_BUILTIN_STFSR,
10758 /* VIS 1.0 builtins. */
10759 SPARC_BUILTIN_FPACK16,
10760 SPARC_BUILTIN_FPACK32,
10761 SPARC_BUILTIN_FPACKFIX,
10762 SPARC_BUILTIN_FEXPAND,
10763 SPARC_BUILTIN_FPMERGE,
10764 SPARC_BUILTIN_FMUL8X16,
10765 SPARC_BUILTIN_FMUL8X16AU,
10766 SPARC_BUILTIN_FMUL8X16AL,
10767 SPARC_BUILTIN_FMUL8SUX16,
10768 SPARC_BUILTIN_FMUL8ULX16,
10769 SPARC_BUILTIN_FMULD8SUX16,
10770 SPARC_BUILTIN_FMULD8ULX16,
10771 SPARC_BUILTIN_FALIGNDATAV4HI,
10772 SPARC_BUILTIN_FALIGNDATAV8QI,
10773 SPARC_BUILTIN_FALIGNDATAV2SI,
10774 SPARC_BUILTIN_FALIGNDATADI,
10775 SPARC_BUILTIN_WRGSR,
10776 SPARC_BUILTIN_RDGSR,
10777 SPARC_BUILTIN_ALIGNADDR,
10778 SPARC_BUILTIN_ALIGNADDRL,
10779 SPARC_BUILTIN_PDIST,
10780 SPARC_BUILTIN_EDGE8,
10781 SPARC_BUILTIN_EDGE8L,
10782 SPARC_BUILTIN_EDGE16,
10783 SPARC_BUILTIN_EDGE16L,
10784 SPARC_BUILTIN_EDGE32,
10785 SPARC_BUILTIN_EDGE32L,
10786 SPARC_BUILTIN_FCMPLE16,
10787 SPARC_BUILTIN_FCMPLE32,
10788 SPARC_BUILTIN_FCMPNE16,
10789 SPARC_BUILTIN_FCMPNE32,
10790 SPARC_BUILTIN_FCMPGT16,
10791 SPARC_BUILTIN_FCMPGT32,
10792 SPARC_BUILTIN_FCMPEQ16,
10793 SPARC_BUILTIN_FCMPEQ32,
10794 SPARC_BUILTIN_FPADD16,
10795 SPARC_BUILTIN_FPADD16S,
10796 SPARC_BUILTIN_FPADD32,
10797 SPARC_BUILTIN_FPADD32S,
10798 SPARC_BUILTIN_FPSUB16,
10799 SPARC_BUILTIN_FPSUB16S,
10800 SPARC_BUILTIN_FPSUB32,
10801 SPARC_BUILTIN_FPSUB32S,
10802 SPARC_BUILTIN_ARRAY8,
10803 SPARC_BUILTIN_ARRAY16,
10804 SPARC_BUILTIN_ARRAY32,
10806 /* VIS 2.0 builtins. */
10807 SPARC_BUILTIN_EDGE8N,
10808 SPARC_BUILTIN_EDGE8LN,
10809 SPARC_BUILTIN_EDGE16N,
10810 SPARC_BUILTIN_EDGE16LN,
10811 SPARC_BUILTIN_EDGE32N,
10812 SPARC_BUILTIN_EDGE32LN,
10813 SPARC_BUILTIN_BMASK,
10814 SPARC_BUILTIN_BSHUFFLEV4HI,
10815 SPARC_BUILTIN_BSHUFFLEV8QI,
10816 SPARC_BUILTIN_BSHUFFLEV2SI,
10817 SPARC_BUILTIN_BSHUFFLEDI,
10819 /* VIS 3.0 builtins. */
10820 SPARC_BUILTIN_CMASK8,
10821 SPARC_BUILTIN_CMASK16,
10822 SPARC_BUILTIN_CMASK32,
10823 SPARC_BUILTIN_FCHKSM16,
10824 SPARC_BUILTIN_FSLL16,
10825 SPARC_BUILTIN_FSLAS16,
10826 SPARC_BUILTIN_FSRL16,
10827 SPARC_BUILTIN_FSRA16,
10828 SPARC_BUILTIN_FSLL32,
10829 SPARC_BUILTIN_FSLAS32,
10830 SPARC_BUILTIN_FSRL32,
10831 SPARC_BUILTIN_FSRA32,
10832 SPARC_BUILTIN_PDISTN,
10833 SPARC_BUILTIN_FMEAN16,
10834 SPARC_BUILTIN_FPADD64,
10835 SPARC_BUILTIN_FPSUB64,
10836 SPARC_BUILTIN_FPADDS16,
10837 SPARC_BUILTIN_FPADDS16S,
10838 SPARC_BUILTIN_FPSUBS16,
10839 SPARC_BUILTIN_FPSUBS16S,
10840 SPARC_BUILTIN_FPADDS32,
10841 SPARC_BUILTIN_FPADDS32S,
10842 SPARC_BUILTIN_FPSUBS32,
10843 SPARC_BUILTIN_FPSUBS32S,
10844 SPARC_BUILTIN_FUCMPLE8,
10845 SPARC_BUILTIN_FUCMPNE8,
10846 SPARC_BUILTIN_FUCMPGT8,
10847 SPARC_BUILTIN_FUCMPEQ8,
10848 SPARC_BUILTIN_FHADDS,
10849 SPARC_BUILTIN_FHADDD,
10850 SPARC_BUILTIN_FHSUBS,
10851 SPARC_BUILTIN_FHSUBD,
10852 SPARC_BUILTIN_FNHADDS,
10853 SPARC_BUILTIN_FNHADDD,
10854 SPARC_BUILTIN_UMULXHI,
10855 SPARC_BUILTIN_XMULX,
10856 SPARC_BUILTIN_XMULXHI,
10858 /* VIS 4.0 builtins. */
10859 SPARC_BUILTIN_FPADD8,
10860 SPARC_BUILTIN_FPADDS8,
10861 SPARC_BUILTIN_FPADDUS8,
10862 SPARC_BUILTIN_FPADDUS16,
10863 SPARC_BUILTIN_FPCMPLE8,
10864 SPARC_BUILTIN_FPCMPGT8,
10865 SPARC_BUILTIN_FPCMPULE16,
10866 SPARC_BUILTIN_FPCMPUGT16,
10867 SPARC_BUILTIN_FPCMPULE32,
10868 SPARC_BUILTIN_FPCMPUGT32,
10869 SPARC_BUILTIN_FPMAX8,
10870 SPARC_BUILTIN_FPMAX16,
10871 SPARC_BUILTIN_FPMAX32,
10872 SPARC_BUILTIN_FPMAXU8,
10873 SPARC_BUILTIN_FPMAXU16,
10874 SPARC_BUILTIN_FPMAXU32,
10875 SPARC_BUILTIN_FPMIN8,
10876 SPARC_BUILTIN_FPMIN16,
10877 SPARC_BUILTIN_FPMIN32,
10878 SPARC_BUILTIN_FPMINU8,
10879 SPARC_BUILTIN_FPMINU16,
10880 SPARC_BUILTIN_FPMINU32,
10881 SPARC_BUILTIN_FPSUB8,
10882 SPARC_BUILTIN_FPSUBS8,
10883 SPARC_BUILTIN_FPSUBUS8,
10884 SPARC_BUILTIN_FPSUBUS16,
10886 /* VIS 4.0B builtins. */
10888 /* Note that all the DICTUNPACK* entries should be kept
10889 contiguous. */
10890 SPARC_BUILTIN_FIRST_DICTUNPACK,
10891 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10892 SPARC_BUILTIN_DICTUNPACK16,
10893 SPARC_BUILTIN_DICTUNPACK32,
10894 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10896 /* Note that all the FPCMP*SHL entries should be kept
10897 contiguous. */
10898 SPARC_BUILTIN_FIRST_FPCMPSHL,
10899 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10900 SPARC_BUILTIN_FPCMPGT8SHL,
10901 SPARC_BUILTIN_FPCMPEQ8SHL,
10902 SPARC_BUILTIN_FPCMPNE8SHL,
10903 SPARC_BUILTIN_FPCMPLE16SHL,
10904 SPARC_BUILTIN_FPCMPGT16SHL,
10905 SPARC_BUILTIN_FPCMPEQ16SHL,
10906 SPARC_BUILTIN_FPCMPNE16SHL,
10907 SPARC_BUILTIN_FPCMPLE32SHL,
10908 SPARC_BUILTIN_FPCMPGT32SHL,
10909 SPARC_BUILTIN_FPCMPEQ32SHL,
10910 SPARC_BUILTIN_FPCMPNE32SHL,
10911 SPARC_BUILTIN_FPCMPULE8SHL,
10912 SPARC_BUILTIN_FPCMPUGT8SHL,
10913 SPARC_BUILTIN_FPCMPULE16SHL,
10914 SPARC_BUILTIN_FPCMPUGT16SHL,
10915 SPARC_BUILTIN_FPCMPULE32SHL,
10916 SPARC_BUILTIN_FPCMPUGT32SHL,
10917 SPARC_BUILTIN_FPCMPDE8SHL,
10918 SPARC_BUILTIN_FPCMPDE16SHL,
10919 SPARC_BUILTIN_FPCMPDE32SHL,
10920 SPARC_BUILTIN_FPCMPUR8SHL,
10921 SPARC_BUILTIN_FPCMPUR16SHL,
10922 SPARC_BUILTIN_FPCMPUR32SHL,
10923 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
10925 SPARC_BUILTIN_MAX
10928 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
10929 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
10931 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
10932 The instruction should require a constant operand of some sort. The
10933 function prints an error if OPVAL is not valid. */
10935 static int
10936 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
10938 if (GET_CODE (opval) != CONST_INT)
10940 error ("%qs expects a constant argument", insn_data[icode].name);
10941 return false;
10944 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
10946 error ("constant argument out of range for %qs", insn_data[icode].name);
10947 return false;
10949 return true;
10952 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
10953 function decl or NULL_TREE if the builtin was not added. */
10955 static tree
10956 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
10957 tree type)
10959 tree t
10960 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
10962 if (t)
10964 sparc_builtins[code] = t;
10965 sparc_builtins_icode[code] = icode;
10968 return t;
10971 /* Likewise, but also marks the function as "const". */
10973 static tree
10974 def_builtin_const (const char *name, enum insn_code icode,
10975 enum sparc_builtins code, tree type)
10977 tree t = def_builtin (name, icode, code, type);
10979 if (t)
10980 TREE_READONLY (t) = 1;
10982 return t;
10985 /* Implement the TARGET_INIT_BUILTINS target hook.
10986 Create builtin functions for special SPARC instructions. */
10988 static void
10989 sparc_init_builtins (void)
10991 if (TARGET_FPU)
10992 sparc_fpu_init_builtins ();
10994 if (TARGET_VIS)
10995 sparc_vis_init_builtins ();
10998 /* Create builtin functions for FPU instructions. */
11000 static void
11001 sparc_fpu_init_builtins (void)
11003 tree ftype
11004 = build_function_type_list (void_type_node,
11005 build_pointer_type (unsigned_type_node), 0);
11006 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11007 SPARC_BUILTIN_LDFSR, ftype);
11008 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11009 SPARC_BUILTIN_STFSR, ftype);
11012 /* Create builtin functions for VIS instructions. */
11014 static void
11015 sparc_vis_init_builtins (void)
11017 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11018 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11019 tree v4hi = build_vector_type (intHI_type_node, 4);
11020 tree v2hi = build_vector_type (intHI_type_node, 2);
11021 tree v2si = build_vector_type (intSI_type_node, 2);
11022 tree v1si = build_vector_type (intSI_type_node, 1);
11024 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11025 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11026 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11027 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11028 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11029 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11030 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11031 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11032 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11033 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11034 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11035 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11036 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11037 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11038 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11039 v8qi, v8qi,
11040 intDI_type_node, 0);
11041 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11042 v8qi, v8qi, 0);
11043 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11044 v8qi, v8qi, 0);
11045 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11046 intSI_type_node, 0);
11047 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11048 intSI_type_node, 0);
11049 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11050 intDI_type_node, 0);
11051 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11052 intDI_type_node,
11053 intDI_type_node, 0);
11054 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11055 intSI_type_node,
11056 intSI_type_node, 0);
11057 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11058 ptr_type_node,
11059 intSI_type_node, 0);
11060 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11061 ptr_type_node,
11062 intDI_type_node, 0);
11063 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11064 ptr_type_node,
11065 ptr_type_node, 0);
11066 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11067 ptr_type_node,
11068 ptr_type_node, 0);
11069 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11070 v4hi, v4hi, 0);
11071 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11072 v2si, v2si, 0);
11073 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11074 v4hi, v4hi, 0);
11075 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11076 v2si, v2si, 0);
11077 tree void_ftype_di = build_function_type_list (void_type_node,
11078 intDI_type_node, 0);
11079 tree di_ftype_void = build_function_type_list (intDI_type_node,
11080 void_type_node, 0);
11081 tree void_ftype_si = build_function_type_list (void_type_node,
11082 intSI_type_node, 0);
11083 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11084 float_type_node,
11085 float_type_node, 0);
11086 tree df_ftype_df_df = build_function_type_list (double_type_node,
11087 double_type_node,
11088 double_type_node, 0);
11090 /* Packing and expanding vectors. */
11091 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11092 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11093 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11094 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11095 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11096 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11097 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11098 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11099 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11100 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11102 /* Multiplications. */
11103 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11104 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11105 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11106 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11107 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11108 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11109 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11110 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11111 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11112 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11113 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11114 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11115 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11116 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11118 /* Data aligning. */
11119 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11120 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11121 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11122 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11123 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11124 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11125 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11126 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11128 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11129 SPARC_BUILTIN_WRGSR, void_ftype_di);
11130 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11131 SPARC_BUILTIN_RDGSR, di_ftype_void);
11133 if (TARGET_ARCH64)
11135 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11136 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11137 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11138 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11140 else
11142 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11143 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11144 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11145 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11148 /* Pixel distance. */
11149 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11150 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11152 /* Edge handling. */
11153 if (TARGET_ARCH64)
11155 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11156 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11157 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11158 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11159 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11160 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11161 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11162 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11163 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11164 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11165 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11166 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11168 else
11170 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11171 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11172 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11173 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11174 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11175 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11176 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11177 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11178 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11179 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11180 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11181 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11184 /* Pixel compare. */
11185 if (TARGET_ARCH64)
11187 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11188 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11189 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11190 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11191 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11192 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11193 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11194 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11195 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11196 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11197 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11198 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11199 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11200 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11201 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11202 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11204 else
11206 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11207 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11208 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11209 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11210 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11211 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11212 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11213 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11214 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11215 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11216 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11217 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11218 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11219 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11220 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11221 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11224 /* Addition and subtraction. */
11225 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11226 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11227 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11228 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11229 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11230 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11231 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11232 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11233 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11234 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11235 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11236 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11237 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11238 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11239 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11240 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11242 /* Three-dimensional array addressing. */
11243 if (TARGET_ARCH64)
11245 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11246 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11247 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11248 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11249 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11250 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11252 else
11254 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11255 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11256 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11257 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11258 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11259 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11262 if (TARGET_VIS2)
11264 /* Edge handling. */
11265 if (TARGET_ARCH64)
11267 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11268 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11269 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11270 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11271 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11272 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11273 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11274 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11275 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11276 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11277 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11278 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11280 else
11282 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11283 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11284 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11285 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11286 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11287 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11288 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11289 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11290 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11291 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11292 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11293 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11296 /* Byte mask and shuffle. */
11297 if (TARGET_ARCH64)
11298 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11299 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11300 else
11301 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11302 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11303 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11304 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11305 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11306 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11307 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11308 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11309 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11310 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11313 if (TARGET_VIS3)
11315 if (TARGET_ARCH64)
11317 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11318 SPARC_BUILTIN_CMASK8, void_ftype_di);
11319 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11320 SPARC_BUILTIN_CMASK16, void_ftype_di);
11321 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11322 SPARC_BUILTIN_CMASK32, void_ftype_di);
11324 else
11326 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11327 SPARC_BUILTIN_CMASK8, void_ftype_si);
11328 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11329 SPARC_BUILTIN_CMASK16, void_ftype_si);
11330 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11331 SPARC_BUILTIN_CMASK32, void_ftype_si);
11334 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11335 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11337 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11338 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11339 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11340 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11341 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11342 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11343 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11344 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11345 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11346 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11347 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11348 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11349 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11350 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11351 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11352 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11354 if (TARGET_ARCH64)
11355 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11356 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11357 else
11358 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11359 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11361 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11362 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11363 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11364 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11365 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11366 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11368 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11369 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11370 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11371 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11372 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11373 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11374 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11375 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11376 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11377 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11378 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11379 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11380 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11381 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11382 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11383 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11385 if (TARGET_ARCH64)
11387 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11388 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11389 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11390 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11391 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11392 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11393 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11394 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11396 else
11398 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11399 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11400 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11401 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11402 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11403 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11404 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11405 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11408 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11409 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11410 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11411 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11412 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11413 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11414 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11415 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11416 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11417 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11418 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11419 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11421 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11422 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11423 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11424 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11425 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11426 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11429 if (TARGET_VIS4)
11431 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11432 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11433 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11434 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11435 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11436 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11437 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11438 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11441 if (TARGET_ARCH64)
11443 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11444 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11445 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11446 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11447 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11448 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11449 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11450 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11451 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11452 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11453 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11454 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11456 else
11458 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11459 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11460 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11461 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11462 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11463 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11464 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11465 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11466 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11467 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11468 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11469 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11472 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11473 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11474 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11475 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11476 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11477 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11478 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11479 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11480 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11481 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11482 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11483 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11484 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11485 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11486 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11487 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11488 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11489 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11490 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11491 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11492 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11493 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11494 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11495 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11496 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11497 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11498 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11499 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11500 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11501 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11502 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11503 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11506 if (TARGET_VIS4B)
11508 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11509 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11510 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11511 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11512 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11513 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11515 if (TARGET_ARCH64)
11517 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11518 v8qi, v8qi,
11519 intSI_type_node, 0);
11520 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11521 v4hi, v4hi,
11522 intSI_type_node, 0);
11523 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11524 v2si, v2si,
11525 intSI_type_node, 0);
11527 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11528 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11529 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11530 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11531 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11532 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11533 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11534 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11536 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11537 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11538 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11539 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11540 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11541 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11542 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11543 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11545 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11546 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11547 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11548 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11549 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11550 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11551 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11552 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11555 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11556 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11557 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11558 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11560 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11561 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11562 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11563 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11565 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11566 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11567 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11568 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11570 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11571 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11572 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11573 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11574 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11575 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11577 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11578 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11579 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11580 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11581 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11582 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11585 else
11587 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11588 v8qi, v8qi,
11589 intSI_type_node, 0);
11590 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11591 v4hi, v4hi,
11592 intSI_type_node, 0);
11593 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11594 v2si, v2si,
11595 intSI_type_node, 0);
11597 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11598 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11599 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11600 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11601 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11602 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11603 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11604 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11606 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11607 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11608 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11609 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11610 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11611 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11612 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11613 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11615 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11616 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11617 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11618 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11619 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11620 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11621 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11622 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11625 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11626 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11627 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11628 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11630 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11631 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11632 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11633 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11635 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11636 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11637 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11638 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11640 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11641 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11642 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11643 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11644 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11645 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11647 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11648 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11649 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11650 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11651 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11652 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11657 /* Implement TARGET_BUILTIN_DECL hook. */
11659 static tree
11660 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11662 if (code >= SPARC_BUILTIN_MAX)
11663 return error_mark_node;
11665 return sparc_builtins[code];
11668 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11670 static rtx
11671 sparc_expand_builtin (tree exp, rtx target,
11672 rtx subtarget ATTRIBUTE_UNUSED,
11673 machine_mode tmode ATTRIBUTE_UNUSED,
11674 int ignore ATTRIBUTE_UNUSED)
11676 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11677 enum sparc_builtins code
11678 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11679 enum insn_code icode = sparc_builtins_icode[code];
11680 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11681 call_expr_arg_iterator iter;
11682 int arg_count = 0;
11683 rtx pat, op[4];
11684 tree arg;
11686 if (nonvoid)
11688 machine_mode tmode = insn_data[icode].operand[0].mode;
11689 if (!target
11690 || GET_MODE (target) != tmode
11691 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11692 op[0] = gen_reg_rtx (tmode);
11693 else
11694 op[0] = target;
11696 else
11697 op[0] = NULL_RTX;
11699 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11701 const struct insn_operand_data *insn_op;
11702 int idx;
11704 if (arg == error_mark_node)
11705 return NULL_RTX;
11707 arg_count++;
11708 idx = arg_count - !nonvoid;
11709 insn_op = &insn_data[icode].operand[idx];
11710 op[arg_count] = expand_normal (arg);
11712 /* Some of the builtins require constant arguments. We check
11713 for this here. */
11714 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11715 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11716 && arg_count == 3)
11717 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11718 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11719 && arg_count == 2))
11721 if (!check_constant_argument (icode, idx, op[arg_count]))
11722 return const0_rtx;
11725 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11727 if (!address_operand (op[arg_count], SImode))
11729 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11730 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11732 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11735 else if (insn_op->mode == V1DImode
11736 && GET_MODE (op[arg_count]) == DImode)
11737 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11739 else if (insn_op->mode == V1SImode
11740 && GET_MODE (op[arg_count]) == SImode)
11741 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11743 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11744 insn_op->mode))
11745 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11748 switch (arg_count)
11750 case 0:
11751 pat = GEN_FCN (icode) (op[0]);
11752 break;
11753 case 1:
11754 if (nonvoid)
11755 pat = GEN_FCN (icode) (op[0], op[1]);
11756 else
11757 pat = GEN_FCN (icode) (op[1]);
11758 break;
11759 case 2:
11760 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11761 break;
11762 case 3:
11763 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11764 break;
11765 default:
11766 gcc_unreachable ();
11769 if (!pat)
11770 return NULL_RTX;
11772 emit_insn (pat);
11774 return (nonvoid ? op[0] : const0_rtx);
11777 /* Return the upper 16 bits of the 8x16 multiplication. */
11779 static int
11780 sparc_vis_mul8x16 (int e8, int e16)
11782 return (e8 * e16 + 128) / 256;
11785 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11786 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11788 static void
11789 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11790 tree inner_type, tree cst0, tree cst1)
11792 unsigned i, num = VECTOR_CST_NELTS (cst0);
11793 int scale;
11795 switch (fncode)
11797 case SPARC_BUILTIN_FMUL8X16:
11798 for (i = 0; i < num; ++i)
11800 int val
11801 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11802 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11803 n_elts->quick_push (build_int_cst (inner_type, val));
11805 break;
11807 case SPARC_BUILTIN_FMUL8X16AU:
11808 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11810 for (i = 0; i < num; ++i)
11812 int val
11813 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11814 scale);
11815 n_elts->quick_push (build_int_cst (inner_type, val));
11817 break;
11819 case SPARC_BUILTIN_FMUL8X16AL:
11820 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11822 for (i = 0; i < num; ++i)
11824 int val
11825 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11826 scale);
11827 n_elts->quick_push (build_int_cst (inner_type, val));
11829 break;
11831 default:
11832 gcc_unreachable ();
11836 /* Implement TARGET_FOLD_BUILTIN hook.
11838 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11839 result of the function call is ignored. NULL_TREE is returned if the
11840 function could not be folded. */
11842 static tree
11843 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11844 tree *args, bool ignore)
11846 enum sparc_builtins code
11847 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11848 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11849 tree arg0, arg1, arg2;
11851 if (ignore)
11852 switch (code)
11854 case SPARC_BUILTIN_LDFSR:
11855 case SPARC_BUILTIN_STFSR:
11856 case SPARC_BUILTIN_ALIGNADDR:
11857 case SPARC_BUILTIN_WRGSR:
11858 case SPARC_BUILTIN_BMASK:
11859 case SPARC_BUILTIN_CMASK8:
11860 case SPARC_BUILTIN_CMASK16:
11861 case SPARC_BUILTIN_CMASK32:
11862 break;
11864 default:
11865 return build_zero_cst (rtype);
11868 switch (code)
11870 case SPARC_BUILTIN_FEXPAND:
11871 arg0 = args[0];
11872 STRIP_NOPS (arg0);
11874 if (TREE_CODE (arg0) == VECTOR_CST)
11876 tree inner_type = TREE_TYPE (rtype);
11877 unsigned i;
11879 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11880 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11882 unsigned HOST_WIDE_INT val
11883 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11884 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11886 return n_elts.build ();
11888 break;
11890 case SPARC_BUILTIN_FMUL8X16:
11891 case SPARC_BUILTIN_FMUL8X16AU:
11892 case SPARC_BUILTIN_FMUL8X16AL:
11893 arg0 = args[0];
11894 arg1 = args[1];
11895 STRIP_NOPS (arg0);
11896 STRIP_NOPS (arg1);
11898 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11900 tree inner_type = TREE_TYPE (rtype);
11901 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11902 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11903 return n_elts.build ();
11905 break;
11907 case SPARC_BUILTIN_FPMERGE:
11908 arg0 = args[0];
11909 arg1 = args[1];
11910 STRIP_NOPS (arg0);
11911 STRIP_NOPS (arg1);
11913 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11915 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
11916 unsigned i;
11917 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11919 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
11920 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
11923 return n_elts.build ();
11925 break;
11927 case SPARC_BUILTIN_PDIST:
11928 case SPARC_BUILTIN_PDISTN:
11929 arg0 = args[0];
11930 arg1 = args[1];
11931 STRIP_NOPS (arg0);
11932 STRIP_NOPS (arg1);
11933 if (code == SPARC_BUILTIN_PDIST)
11935 arg2 = args[2];
11936 STRIP_NOPS (arg2);
11938 else
11939 arg2 = integer_zero_node;
11941 if (TREE_CODE (arg0) == VECTOR_CST
11942 && TREE_CODE (arg1) == VECTOR_CST
11943 && TREE_CODE (arg2) == INTEGER_CST)
11945 bool overflow = false;
11946 widest_int result = wi::to_widest (arg2);
11947 widest_int tmp;
11948 unsigned i;
11950 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11952 tree e0 = VECTOR_CST_ELT (arg0, i);
11953 tree e1 = VECTOR_CST_ELT (arg1, i);
11955 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
11957 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
11958 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
11959 if (wi::neg_p (tmp))
11960 tmp = wi::neg (tmp, &neg2_ovf);
11961 else
11962 neg2_ovf = wi::OVF_NONE;
11963 result = wi::add (result, tmp, SIGNED, &add2_ovf);
11964 overflow |= ((neg1_ovf != wi::OVF_NONE)
11965 | (neg2_ovf != wi::OVF_NONE)
11966 | (add1_ovf != wi::OVF_NONE)
11967 | (add2_ovf != wi::OVF_NONE));
11970 gcc_assert (!overflow);
11972 return wide_int_to_tree (rtype, result);
11975 default:
11976 break;
11979 return NULL_TREE;
11982 /* ??? This duplicates information provided to the compiler by the
11983 ??? scheduler description. Some day, teach genautomata to output
11984 ??? the latencies and then CSE will just use that. */
11986 static bool
11987 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
11988 int opno ATTRIBUTE_UNUSED,
11989 int *total, bool speed ATTRIBUTE_UNUSED)
11991 int code = GET_CODE (x);
11992 bool float_mode_p = FLOAT_MODE_P (mode);
11994 switch (code)
11996 case CONST_INT:
11997 if (SMALL_INT (x))
11998 *total = 0;
11999 else
12000 *total = 2;
12001 return true;
12003 case CONST_WIDE_INT:
12004 *total = 0;
12005 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12006 *total += 2;
12007 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12008 *total += 2;
12009 return true;
12011 case HIGH:
12012 *total = 2;
12013 return true;
12015 case CONST:
12016 case LABEL_REF:
12017 case SYMBOL_REF:
12018 *total = 4;
12019 return true;
12021 case CONST_DOUBLE:
12022 *total = 8;
12023 return true;
12025 case MEM:
12026 /* If outer-code was a sign or zero extension, a cost
12027 of COSTS_N_INSNS (1) was already added in. This is
12028 why we are subtracting it back out. */
12029 if (outer_code == ZERO_EXTEND)
12031 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12033 else if (outer_code == SIGN_EXTEND)
12035 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12037 else if (float_mode_p)
12039 *total = sparc_costs->float_load;
12041 else
12043 *total = sparc_costs->int_load;
12046 return true;
12048 case PLUS:
12049 case MINUS:
12050 if (float_mode_p)
12051 *total = sparc_costs->float_plusminus;
12052 else
12053 *total = COSTS_N_INSNS (1);
12054 return false;
12056 case FMA:
12058 rtx sub;
12060 gcc_assert (float_mode_p);
12061 *total = sparc_costs->float_mul;
12063 sub = XEXP (x, 0);
12064 if (GET_CODE (sub) == NEG)
12065 sub = XEXP (sub, 0);
12066 *total += rtx_cost (sub, mode, FMA, 0, speed);
12068 sub = XEXP (x, 2);
12069 if (GET_CODE (sub) == NEG)
12070 sub = XEXP (sub, 0);
12071 *total += rtx_cost (sub, mode, FMA, 2, speed);
12072 return true;
12075 case MULT:
12076 if (float_mode_p)
12077 *total = sparc_costs->float_mul;
12078 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12079 *total = COSTS_N_INSNS (25);
12080 else
12082 int bit_cost;
12084 bit_cost = 0;
12085 if (sparc_costs->int_mul_bit_factor)
12087 int nbits;
12089 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12091 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12092 for (nbits = 0; value != 0; value &= value - 1)
12093 nbits++;
12095 else
12096 nbits = 7;
12098 if (nbits < 3)
12099 nbits = 3;
12100 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12101 bit_cost = COSTS_N_INSNS (bit_cost);
12104 if (mode == DImode || !TARGET_HARD_MUL)
12105 *total = sparc_costs->int_mulX + bit_cost;
12106 else
12107 *total = sparc_costs->int_mul + bit_cost;
12109 return false;
12111 case ASHIFT:
12112 case ASHIFTRT:
12113 case LSHIFTRT:
12114 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12115 return false;
12117 case DIV:
12118 case UDIV:
12119 case MOD:
12120 case UMOD:
12121 if (float_mode_p)
12123 if (mode == DFmode)
12124 *total = sparc_costs->float_div_df;
12125 else
12126 *total = sparc_costs->float_div_sf;
12128 else
12130 if (mode == DImode)
12131 *total = sparc_costs->int_divX;
12132 else
12133 *total = sparc_costs->int_div;
12135 return false;
12137 case NEG:
12138 if (! float_mode_p)
12140 *total = COSTS_N_INSNS (1);
12141 return false;
12143 /* FALLTHRU */
12145 case ABS:
12146 case FLOAT:
12147 case UNSIGNED_FLOAT:
12148 case FIX:
12149 case UNSIGNED_FIX:
12150 case FLOAT_EXTEND:
12151 case FLOAT_TRUNCATE:
12152 *total = sparc_costs->float_move;
12153 return false;
12155 case SQRT:
12156 if (mode == DFmode)
12157 *total = sparc_costs->float_sqrt_df;
12158 else
12159 *total = sparc_costs->float_sqrt_sf;
12160 return false;
12162 case COMPARE:
12163 if (float_mode_p)
12164 *total = sparc_costs->float_cmp;
12165 else
12166 *total = COSTS_N_INSNS (1);
12167 return false;
12169 case IF_THEN_ELSE:
12170 if (float_mode_p)
12171 *total = sparc_costs->float_cmove;
12172 else
12173 *total = sparc_costs->int_cmove;
12174 return false;
12176 case IOR:
12177 /* Handle the NAND vector patterns. */
12178 if (sparc_vector_mode_supported_p (mode)
12179 && GET_CODE (XEXP (x, 0)) == NOT
12180 && GET_CODE (XEXP (x, 1)) == NOT)
12182 *total = COSTS_N_INSNS (1);
12183 return true;
12185 else
12186 return false;
12188 default:
12189 return false;
12193 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12195 static inline bool
12196 general_or_i64_p (reg_class_t rclass)
12198 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12201 /* Implement TARGET_REGISTER_MOVE_COST. */
12203 static int
12204 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12205 reg_class_t from, reg_class_t to)
12207 bool need_memory = false;
12209 /* This helps postreload CSE to eliminate redundant comparisons. */
12210 if (from == NO_REGS || to == NO_REGS)
12211 return 100;
12213 if (from == FPCC_REGS || to == FPCC_REGS)
12214 need_memory = true;
12215 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12216 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12218 if (TARGET_VIS3)
12220 int size = GET_MODE_SIZE (mode);
12221 if (size == 8 || size == 4)
12223 if (! TARGET_ARCH32 || size == 4)
12224 return 4;
12225 else
12226 return 6;
12229 need_memory = true;
12232 if (need_memory)
12234 if (sparc_cpu == PROCESSOR_ULTRASPARC
12235 || sparc_cpu == PROCESSOR_ULTRASPARC3
12236 || sparc_cpu == PROCESSOR_NIAGARA
12237 || sparc_cpu == PROCESSOR_NIAGARA2
12238 || sparc_cpu == PROCESSOR_NIAGARA3
12239 || sparc_cpu == PROCESSOR_NIAGARA4
12240 || sparc_cpu == PROCESSOR_NIAGARA7
12241 || sparc_cpu == PROCESSOR_M8)
12242 return 12;
12244 return 6;
12247 return 2;
12250 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12251 This is achieved by means of a manual dynamic stack space allocation in
12252 the current frame. We make the assumption that SEQ doesn't contain any
12253 function calls, with the possible exception of calls to the GOT helper. */
12255 static void
12256 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12258 /* We must preserve the lowest 16 words for the register save area. */
12259 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12260 /* We really need only 2 words of fresh stack space. */
12261 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12263 rtx slot
12264 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12265 SPARC_STACK_BIAS + offset));
12267 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12268 emit_insn (gen_rtx_SET (slot, reg));
12269 if (reg2)
12270 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12271 reg2));
12272 emit_insn (seq);
12273 if (reg2)
12274 emit_insn (gen_rtx_SET (reg2,
12275 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12276 emit_insn (gen_rtx_SET (reg, slot));
12277 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12280 /* Output the assembler code for a thunk function. THUNK_DECL is the
12281 declaration for the thunk function itself, FUNCTION is the decl for
12282 the target function. DELTA is an immediate constant offset to be
12283 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12284 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12286 static void
12287 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12288 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12289 tree function)
12291 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12292 rtx this_rtx, funexp;
12293 rtx_insn *insn;
12294 unsigned int int_arg_first;
12296 reload_completed = 1;
12297 epilogue_completed = 1;
12299 emit_note (NOTE_INSN_PROLOGUE_END);
12301 if (TARGET_FLAT)
12303 sparc_leaf_function_p = 1;
12305 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12307 else if (flag_delayed_branch)
12309 /* We will emit a regular sibcall below, so we need to instruct
12310 output_sibcall that we are in a leaf function. */
12311 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12313 /* This will cause final.c to invoke leaf_renumber_regs so we
12314 must behave as if we were in a not-yet-leafified function. */
12315 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12317 else
12319 /* We will emit the sibcall manually below, so we will need to
12320 manually spill non-leaf registers. */
12321 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12323 /* We really are in a leaf function. */
12324 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12327 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12328 returns a structure, the structure return pointer is there instead. */
12329 if (TARGET_ARCH64
12330 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12331 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12332 else
12333 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12335 /* Add DELTA. When possible use a plain add, otherwise load it into
12336 a register first. */
12337 if (delta)
12339 rtx delta_rtx = GEN_INT (delta);
12341 if (! SPARC_SIMM13_P (delta))
12343 rtx scratch = gen_rtx_REG (Pmode, 1);
12344 emit_move_insn (scratch, delta_rtx);
12345 delta_rtx = scratch;
12348 /* THIS_RTX += DELTA. */
12349 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12352 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12353 if (vcall_offset)
12355 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12356 rtx scratch = gen_rtx_REG (Pmode, 1);
12358 gcc_assert (vcall_offset < 0);
12360 /* SCRATCH = *THIS_RTX. */
12361 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12363 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12364 may not have any available scratch register at this point. */
12365 if (SPARC_SIMM13_P (vcall_offset))
12367 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12368 else if (! fixed_regs[5]
12369 /* The below sequence is made up of at least 2 insns,
12370 while the default method may need only one. */
12371 && vcall_offset < -8192)
12373 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12374 emit_move_insn (scratch2, vcall_offset_rtx);
12375 vcall_offset_rtx = scratch2;
12377 else
12379 rtx increment = GEN_INT (-4096);
12381 /* VCALL_OFFSET is a negative number whose typical range can be
12382 estimated as -32768..0 in 32-bit mode. In almost all cases
12383 it is therefore cheaper to emit multiple add insns than
12384 spilling and loading the constant into a register (at least
12385 6 insns). */
12386 while (! SPARC_SIMM13_P (vcall_offset))
12388 emit_insn (gen_add2_insn (scratch, increment));
12389 vcall_offset += 4096;
12391 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12394 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12395 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12396 gen_rtx_PLUS (Pmode,
12397 scratch,
12398 vcall_offset_rtx)));
12400 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12401 emit_insn (gen_add2_insn (this_rtx, scratch));
12404 /* Generate a tail call to the target function. */
12405 if (! TREE_USED (function))
12407 assemble_external (function);
12408 TREE_USED (function) = 1;
12410 funexp = XEXP (DECL_RTL (function), 0);
12412 if (flag_delayed_branch)
12414 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12415 insn = emit_call_insn (gen_sibcall (funexp));
12416 SIBLING_CALL_P (insn) = 1;
12418 else
12420 /* The hoops we have to jump through in order to generate a sibcall
12421 without using delay slots... */
12422 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12424 if (flag_pic)
12426 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12427 start_sequence ();
12428 load_got_register (); /* clobbers %o7 */
12429 if (!TARGET_VXWORKS_RTP)
12430 pic_offset_table_rtx = got_register_rtx;
12431 scratch = sparc_legitimize_pic_address (funexp, scratch);
12432 seq = get_insns ();
12433 end_sequence ();
12434 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12436 else if (TARGET_ARCH32)
12438 emit_insn (gen_rtx_SET (scratch,
12439 gen_rtx_HIGH (SImode, funexp)));
12440 emit_insn (gen_rtx_SET (scratch,
12441 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12443 else /* TARGET_ARCH64 */
12445 switch (sparc_code_model)
12447 case CM_MEDLOW:
12448 case CM_MEDMID:
12449 /* The destination can serve as a temporary. */
12450 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12451 break;
12453 case CM_MEDANY:
12454 case CM_EMBMEDANY:
12455 /* The destination cannot serve as a temporary. */
12456 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12457 start_sequence ();
12458 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12459 seq = get_insns ();
12460 end_sequence ();
12461 emit_and_preserve (seq, spill_reg, 0);
12462 break;
12464 default:
12465 gcc_unreachable ();
12469 emit_jump_insn (gen_indirect_jump (scratch));
12472 emit_barrier ();
12474 /* Run just enough of rest_of_compilation to get the insns emitted.
12475 There's not really enough bulk here to make other passes such as
12476 instruction scheduling worth while. */
12477 insn = get_insns ();
12478 shorten_branches (insn);
12479 assemble_start_function (thunk_fndecl, fnname);
12480 final_start_function (insn, file, 1);
12481 final (insn, file, 1);
12482 final_end_function ();
12483 assemble_end_function (thunk_fndecl, fnname);
12485 reload_completed = 0;
12486 epilogue_completed = 0;
12489 /* Return true if sparc_output_mi_thunk would be able to output the
12490 assembler code for the thunk function specified by the arguments
12491 it is passed, and false otherwise. */
12492 static bool
12493 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12494 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12495 HOST_WIDE_INT vcall_offset,
12496 const_tree function ATTRIBUTE_UNUSED)
12498 /* Bound the loop used in the default method above. */
12499 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12502 /* How to allocate a 'struct machine_function'. */
12504 static struct machine_function *
12505 sparc_init_machine_status (void)
12507 return ggc_cleared_alloc<machine_function> ();
12510 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12512 static unsigned HOST_WIDE_INT
12513 sparc_asan_shadow_offset (void)
12515 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12518 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12519 We need to emit DTP-relative relocations. */
12521 static void
12522 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12524 switch (size)
12526 case 4:
12527 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12528 break;
12529 case 8:
12530 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12531 break;
12532 default:
12533 gcc_unreachable ();
12535 output_addr_const (file, x);
12536 fputs (")", file);
12539 /* Do whatever processing is required at the end of a file. */
12541 static void
12542 sparc_file_end (void)
12544 /* If we need to emit the special GOT helper function, do so now. */
12545 if (got_helper_needed)
12547 const char *name = XSTR (got_helper_rtx, 0);
12548 #ifdef DWARF2_UNWIND_INFO
12549 bool do_cfi;
12550 #endif
12552 if (USE_HIDDEN_LINKONCE)
12554 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12555 get_identifier (name),
12556 build_function_type_list (void_type_node,
12557 NULL_TREE));
12558 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12559 NULL_TREE, void_type_node);
12560 TREE_PUBLIC (decl) = 1;
12561 TREE_STATIC (decl) = 1;
12562 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12563 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12564 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12565 resolve_unique_section (decl, 0, flag_function_sections);
12566 allocate_struct_function (decl, true);
12567 cfun->is_thunk = 1;
12568 current_function_decl = decl;
12569 init_varasm_status ();
12570 assemble_start_function (decl, name);
12572 else
12574 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12575 switch_to_section (text_section);
12576 if (align > 0)
12577 ASM_OUTPUT_ALIGN (asm_out_file, align);
12578 ASM_OUTPUT_LABEL (asm_out_file, name);
12581 #ifdef DWARF2_UNWIND_INFO
12582 do_cfi = dwarf2out_do_cfi_asm ();
12583 if (do_cfi)
12584 output_asm_insn (".cfi_startproc", NULL);
12585 #endif
12586 if (flag_delayed_branch)
12588 output_asm_insn ("jmp\t%%o7+8", NULL);
12589 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12591 else
12593 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12594 output_asm_insn ("jmp\t%%o7+8", NULL);
12595 output_asm_insn (" nop", NULL);
12597 #ifdef DWARF2_UNWIND_INFO
12598 if (do_cfi)
12599 output_asm_insn (".cfi_endproc", NULL);
12600 #endif
12603 if (NEED_INDICATE_EXEC_STACK)
12604 file_end_indicate_exec_stack ();
12606 #ifdef TARGET_SOLARIS
12607 solaris_file_end ();
12608 #endif
12611 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12612 /* Implement TARGET_MANGLE_TYPE. */
12614 static const char *
12615 sparc_mangle_type (const_tree type)
12617 if (TARGET_ARCH32
12618 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12619 && TARGET_LONG_DOUBLE_128)
12620 return "g";
12622 /* For all other types, use normal C++ mangling. */
12623 return NULL;
12625 #endif
12627 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12628 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12629 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12631 void
12632 sparc_emit_membar_for_model (enum memmodel model,
12633 int load_store, int before_after)
12635 /* Bits for the MEMBAR mmask field. */
12636 const int LoadLoad = 1;
12637 const int StoreLoad = 2;
12638 const int LoadStore = 4;
12639 const int StoreStore = 8;
12641 int mm = 0, implied = 0;
12643 switch (sparc_memory_model)
12645 case SMM_SC:
12646 /* Sequential Consistency. All memory transactions are immediately
12647 visible in sequential execution order. No barriers needed. */
12648 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12649 break;
12651 case SMM_TSO:
12652 /* Total Store Ordering: all memory transactions with store semantics
12653 are followed by an implied StoreStore. */
12654 implied |= StoreStore;
12656 /* If we're not looking for a raw barrer (before+after), then atomic
12657 operations get the benefit of being both load and store. */
12658 if (load_store == 3 && before_after == 1)
12659 implied |= StoreLoad;
12660 /* FALLTHRU */
12662 case SMM_PSO:
12663 /* Partial Store Ordering: all memory transactions with load semantics
12664 are followed by an implied LoadLoad | LoadStore. */
12665 implied |= LoadLoad | LoadStore;
12667 /* If we're not looking for a raw barrer (before+after), then atomic
12668 operations get the benefit of being both load and store. */
12669 if (load_store == 3 && before_after == 2)
12670 implied |= StoreLoad | StoreStore;
12671 /* FALLTHRU */
12673 case SMM_RMO:
12674 /* Relaxed Memory Ordering: no implicit bits. */
12675 break;
12677 default:
12678 gcc_unreachable ();
12681 if (before_after & 1)
12683 if (is_mm_release (model) || is_mm_acq_rel (model)
12684 || is_mm_seq_cst (model))
12686 if (load_store & 1)
12687 mm |= LoadLoad | StoreLoad;
12688 if (load_store & 2)
12689 mm |= LoadStore | StoreStore;
12692 if (before_after & 2)
12694 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12695 || is_mm_seq_cst (model))
12697 if (load_store & 1)
12698 mm |= LoadLoad | LoadStore;
12699 if (load_store & 2)
12700 mm |= StoreLoad | StoreStore;
12704 /* Remove the bits implied by the system memory model. */
12705 mm &= ~implied;
12707 /* For raw barriers (before+after), always emit a barrier.
12708 This will become a compile-time barrier if needed. */
12709 if (mm || before_after == 3)
12710 emit_insn (gen_membar (GEN_INT (mm)));
12713 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12714 compare and swap on the word containing the byte or half-word. */
12716 static void
12717 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12718 rtx oldval, rtx newval)
12720 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12721 rtx addr = gen_reg_rtx (Pmode);
12722 rtx off = gen_reg_rtx (SImode);
12723 rtx oldv = gen_reg_rtx (SImode);
12724 rtx newv = gen_reg_rtx (SImode);
12725 rtx oldvalue = gen_reg_rtx (SImode);
12726 rtx newvalue = gen_reg_rtx (SImode);
12727 rtx res = gen_reg_rtx (SImode);
12728 rtx resv = gen_reg_rtx (SImode);
12729 rtx memsi, val, mask, cc;
12731 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12733 if (Pmode != SImode)
12734 addr1 = gen_lowpart (SImode, addr1);
12735 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12737 memsi = gen_rtx_MEM (SImode, addr);
12738 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12739 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12741 val = copy_to_reg (memsi);
12743 emit_insn (gen_rtx_SET (off,
12744 gen_rtx_XOR (SImode, off,
12745 GEN_INT (GET_MODE (mem) == QImode
12746 ? 3 : 2))));
12748 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12750 if (GET_MODE (mem) == QImode)
12751 mask = force_reg (SImode, GEN_INT (0xff));
12752 else
12753 mask = force_reg (SImode, GEN_INT (0xffff));
12755 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12757 emit_insn (gen_rtx_SET (val,
12758 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12759 val)));
12761 oldval = gen_lowpart (SImode, oldval);
12762 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12764 newval = gen_lowpart_common (SImode, newval);
12765 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12767 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12769 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12771 rtx_code_label *end_label = gen_label_rtx ();
12772 rtx_code_label *loop_label = gen_label_rtx ();
12773 emit_label (loop_label);
12775 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12777 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12779 emit_move_insn (bool_result, const1_rtx);
12781 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12783 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12785 emit_insn (gen_rtx_SET (resv,
12786 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12787 res)));
12789 emit_move_insn (bool_result, const0_rtx);
12791 cc = gen_compare_reg_1 (NE, resv, val);
12792 emit_insn (gen_rtx_SET (val, resv));
12794 /* Use cbranchcc4 to separate the compare and branch! */
12795 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12796 cc, const0_rtx, loop_label));
12798 emit_label (end_label);
12800 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12802 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12804 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12807 /* Expand code to perform a compare-and-swap. */
12809 void
12810 sparc_expand_compare_and_swap (rtx operands[])
12812 rtx bval, retval, mem, oldval, newval;
12813 machine_mode mode;
12814 enum memmodel model;
12816 bval = operands[0];
12817 retval = operands[1];
12818 mem = operands[2];
12819 oldval = operands[3];
12820 newval = operands[4];
12821 model = (enum memmodel) INTVAL (operands[6]);
12822 mode = GET_MODE (mem);
12824 sparc_emit_membar_for_model (model, 3, 1);
12826 if (reg_overlap_mentioned_p (retval, oldval))
12827 oldval = copy_to_reg (oldval);
12829 if (mode == QImode || mode == HImode)
12830 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12831 else
12833 rtx (*gen) (rtx, rtx, rtx, rtx);
12834 rtx x;
12836 if (mode == SImode)
12837 gen = gen_atomic_compare_and_swapsi_1;
12838 else
12839 gen = gen_atomic_compare_and_swapdi_1;
12840 emit_insn (gen (retval, mem, oldval, newval));
12842 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12843 if (x != bval)
12844 convert_move (bval, x, 1);
12847 sparc_emit_membar_for_model (model, 3, 2);
12850 void
12851 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12853 rtx t_1, t_2, t_3;
12855 sel = gen_lowpart (DImode, sel);
12856 switch (vmode)
12858 case E_V2SImode:
12859 /* inp = xxxxxxxAxxxxxxxB */
12860 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12861 NULL_RTX, 1, OPTAB_DIRECT);
12862 /* t_1 = ....xxxxxxxAxxx. */
12863 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12864 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12865 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12866 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12867 /* sel = .......B */
12868 /* t_1 = ...A.... */
12869 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12870 /* sel = ...A...B */
12871 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12872 /* sel = AAAABBBB * 4 */
12873 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12874 /* sel = { A*4, A*4+1, A*4+2, ... } */
12875 break;
12877 case E_V4HImode:
12878 /* inp = xxxAxxxBxxxCxxxD */
12879 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12880 NULL_RTX, 1, OPTAB_DIRECT);
12881 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12882 NULL_RTX, 1, OPTAB_DIRECT);
12883 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12884 NULL_RTX, 1, OPTAB_DIRECT);
12885 /* t_1 = ..xxxAxxxBxxxCxx */
12886 /* t_2 = ....xxxAxxxBxxxC */
12887 /* t_3 = ......xxxAxxxBxx */
12888 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12889 GEN_INT (0x07),
12890 NULL_RTX, 1, OPTAB_DIRECT);
12891 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12892 GEN_INT (0x0700),
12893 NULL_RTX, 1, OPTAB_DIRECT);
12894 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
12895 GEN_INT (0x070000),
12896 NULL_RTX, 1, OPTAB_DIRECT);
12897 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
12898 GEN_INT (0x07000000),
12899 NULL_RTX, 1, OPTAB_DIRECT);
12900 /* sel = .......D */
12901 /* t_1 = .....C.. */
12902 /* t_2 = ...B.... */
12903 /* t_3 = .A...... */
12904 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12905 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
12906 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
12907 /* sel = .A.B.C.D */
12908 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
12909 /* sel = AABBCCDD * 2 */
12910 t_1 = force_reg (SImode, GEN_INT (0x01010101));
12911 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
12912 break;
12914 case E_V8QImode:
12915 /* input = xAxBxCxDxExFxGxH */
12916 sel = expand_simple_binop (DImode, AND, sel,
12917 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
12918 | 0x0f0f0f0f),
12919 NULL_RTX, 1, OPTAB_DIRECT);
12920 /* sel = .A.B.C.D.E.F.G.H */
12921 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
12922 NULL_RTX, 1, OPTAB_DIRECT);
12923 /* t_1 = ..A.B.C.D.E.F.G. */
12924 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12925 NULL_RTX, 1, OPTAB_DIRECT);
12926 /* sel = .AABBCCDDEEFFGGH */
12927 sel = expand_simple_binop (DImode, AND, sel,
12928 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
12929 | 0xff00ff),
12930 NULL_RTX, 1, OPTAB_DIRECT);
12931 /* sel = ..AB..CD..EF..GH */
12932 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12933 NULL_RTX, 1, OPTAB_DIRECT);
12934 /* t_1 = ....AB..CD..EF.. */
12935 sel = expand_simple_binop (DImode, IOR, sel, t_1,
12936 NULL_RTX, 1, OPTAB_DIRECT);
12937 /* sel = ..ABABCDCDEFEFGH */
12938 sel = expand_simple_binop (DImode, AND, sel,
12939 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
12940 NULL_RTX, 1, OPTAB_DIRECT);
12941 /* sel = ....ABCD....EFGH */
12942 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12943 NULL_RTX, 1, OPTAB_DIRECT);
12944 /* t_1 = ........ABCD.... */
12945 sel = gen_lowpart (SImode, sel);
12946 t_1 = gen_lowpart (SImode, t_1);
12947 break;
12949 default:
12950 gcc_unreachable ();
12953 /* Always perform the final addition/merge within the bmask insn. */
12954 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
12957 /* Implement TARGET_VEC_PERM_CONST. */
12959 static bool
12960 sparc_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0,
12961 rtx op1, const vec_perm_indices &sel)
12963 if (!TARGET_VIS2)
12964 return false;
12966 /* All permutes are supported. */
12967 if (!target)
12968 return true;
12970 /* Force target-independent code to convert constant permutations on other
12971 modes down to V8QI. Rely on this to avoid the complexity of the byte
12972 order of the permutation. */
12973 if (vmode != V8QImode)
12974 return false;
12976 unsigned int i, mask;
12977 for (i = mask = 0; i < 8; ++i)
12978 mask |= (sel[i] & 0xf) << (28 - i*4);
12979 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
12981 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
12982 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
12983 return true;
12986 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
12988 static bool
12989 sparc_frame_pointer_required (void)
12991 /* If the stack pointer is dynamically modified in the function, it cannot
12992 serve as the frame pointer. */
12993 if (cfun->calls_alloca)
12994 return true;
12996 /* If the function receives nonlocal gotos, it needs to save the frame
12997 pointer in the nonlocal_goto_save_area object. */
12998 if (cfun->has_nonlocal_label)
12999 return true;
13001 /* In flat mode, that's it. */
13002 if (TARGET_FLAT)
13003 return false;
13005 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13006 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13007 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13010 /* The way this is structured, we can't eliminate SFP in favor of SP
13011 if the frame pointer is required: we want to use the SFP->HFP elimination
13012 in that case. But the test in update_eliminables doesn't know we are
13013 assuming below that we only do the former elimination. */
13015 static bool
13016 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13018 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13021 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13022 they won't be allocated. */
13024 static void
13025 sparc_conditional_register_usage (void)
13027 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13028 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13029 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13030 /* then honor it. */
13031 if (TARGET_ARCH32 && fixed_regs[5])
13032 fixed_regs[5] = 1;
13033 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13034 fixed_regs[5] = 0;
13035 if (! TARGET_V9)
13037 int regno;
13038 for (regno = SPARC_FIRST_V9_FP_REG;
13039 regno <= SPARC_LAST_V9_FP_REG;
13040 regno++)
13041 fixed_regs[regno] = 1;
13042 /* %fcc0 is used by v8 and v9. */
13043 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13044 regno <= SPARC_LAST_V9_FCC_REG;
13045 regno++)
13046 fixed_regs[regno] = 1;
13048 if (! TARGET_FPU)
13050 int regno;
13051 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13052 fixed_regs[regno] = 1;
13054 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13055 /* then honor it. Likewise with g3 and g4. */
13056 if (fixed_regs[2] == 2)
13057 fixed_regs[2] = ! TARGET_APP_REGS;
13058 if (fixed_regs[3] == 2)
13059 fixed_regs[3] = ! TARGET_APP_REGS;
13060 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13061 fixed_regs[4] = ! TARGET_APP_REGS;
13062 else if (TARGET_CM_EMBMEDANY)
13063 fixed_regs[4] = 1;
13064 else if (fixed_regs[4] == 2)
13065 fixed_regs[4] = 0;
13066 if (TARGET_FLAT)
13068 int regno;
13069 /* Disable leaf functions. */
13070 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13071 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13072 leaf_reg_remap [regno] = regno;
13074 if (TARGET_VIS)
13075 global_regs[SPARC_GSR_REG] = 1;
13078 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13080 static bool
13081 sparc_use_pseudo_pic_reg (void)
13083 return !TARGET_VXWORKS_RTP && flag_pic;
13086 /* Implement TARGET_INIT_PIC_REG. */
13088 static void
13089 sparc_init_pic_reg (void)
13091 edge entry_edge;
13092 rtx_insn *seq;
13094 /* In PIC mode, we need to always initialize the PIC register if optimization
13095 is enabled, because we are called from IRA and LRA may later force things
13096 to the constant pool for optimization purposes. */
13097 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13098 return;
13100 start_sequence ();
13101 load_got_register ();
13102 if (!TARGET_VXWORKS_RTP)
13103 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13104 seq = get_insns ();
13105 end_sequence ();
13107 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13108 insert_insn_on_edge (seq, entry_edge);
13109 commit_one_edge_insertion (entry_edge);
13112 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13114 - We can't load constants into FP registers.
13115 - We can't load FP constants into integer registers when soft-float,
13116 because there is no soft-float pattern with a r/F constraint.
13117 - We can't load FP constants into integer registers for TFmode unless
13118 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13119 - Try and reload integer constants (symbolic or otherwise) back into
13120 registers directly, rather than having them dumped to memory. */
13122 static reg_class_t
13123 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13125 machine_mode mode = GET_MODE (x);
13126 if (CONSTANT_P (x))
13128 if (FP_REG_CLASS_P (rclass)
13129 || rclass == GENERAL_OR_FP_REGS
13130 || rclass == GENERAL_OR_EXTRA_FP_REGS
13131 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13132 || (mode == TFmode && ! const_zero_operand (x, mode)))
13133 return NO_REGS;
13135 if (GET_MODE_CLASS (mode) == MODE_INT)
13136 return GENERAL_REGS;
13138 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13140 if (! FP_REG_CLASS_P (rclass)
13141 || !(const_zero_operand (x, mode)
13142 || const_all_ones_operand (x, mode)))
13143 return NO_REGS;
13147 if (TARGET_VIS3
13148 && ! TARGET_ARCH64
13149 && (rclass == EXTRA_FP_REGS
13150 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13152 int regno = true_regnum (x);
13154 if (SPARC_INT_REG_P (regno))
13155 return (rclass == EXTRA_FP_REGS
13156 ? FP_REGS : GENERAL_OR_FP_REGS);
13159 return rclass;
13162 /* Return true if we use LRA instead of reload pass. */
13164 static bool
13165 sparc_lra_p (void)
13167 return TARGET_LRA;
13170 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13171 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13173 const char *
13174 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13176 char mulstr[32];
13178 gcc_assert (! TARGET_ARCH64);
13180 if (sparc_check_64 (operands[1], insn) <= 0)
13181 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13182 if (which_alternative == 1)
13183 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13184 if (GET_CODE (operands[2]) == CONST_INT)
13186 if (which_alternative == 1)
13188 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13189 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13190 output_asm_insn (mulstr, operands);
13191 return "srlx\t%L0, 32, %H0";
13193 else
13195 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13196 output_asm_insn ("or\t%L1, %3, %3", operands);
13197 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13198 output_asm_insn (mulstr, operands);
13199 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13200 return "mov\t%3, %L0";
13203 else if (rtx_equal_p (operands[1], operands[2]))
13205 if (which_alternative == 1)
13207 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13208 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13209 output_asm_insn (mulstr, operands);
13210 return "srlx\t%L0, 32, %H0";
13212 else
13214 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13215 output_asm_insn ("or\t%L1, %3, %3", operands);
13216 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13217 output_asm_insn (mulstr, operands);
13218 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13219 return "mov\t%3, %L0";
13222 if (sparc_check_64 (operands[2], insn) <= 0)
13223 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13224 if (which_alternative == 1)
13226 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13227 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13228 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13229 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13230 output_asm_insn (mulstr, operands);
13231 return "srlx\t%L0, 32, %H0";
13233 else
13235 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13236 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13237 output_asm_insn ("or\t%L1, %3, %3", operands);
13238 output_asm_insn ("or\t%L2, %4, %4", operands);
13239 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13240 output_asm_insn (mulstr, operands);
13241 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13242 return "mov\t%3, %L0";
13246 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13247 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13248 and INNER_MODE are the modes describing TARGET. */
13250 static void
13251 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13252 machine_mode inner_mode)
13254 rtx t1, final_insn, sel;
13255 int bmask;
13257 t1 = gen_reg_rtx (mode);
13259 elt = convert_modes (SImode, inner_mode, elt, true);
13260 emit_move_insn (gen_lowpart(SImode, t1), elt);
13262 switch (mode)
13264 case E_V2SImode:
13265 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13266 bmask = 0x45674567;
13267 break;
13268 case E_V4HImode:
13269 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13270 bmask = 0x67676767;
13271 break;
13272 case E_V8QImode:
13273 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13274 bmask = 0x77777777;
13275 break;
13276 default:
13277 gcc_unreachable ();
13280 sel = force_reg (SImode, GEN_INT (bmask));
13281 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13282 emit_insn (final_insn);
13285 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13286 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13288 static void
13289 vector_init_fpmerge (rtx target, rtx elt)
13291 rtx t1, t2, t2_low, t3, t3_low;
13293 t1 = gen_reg_rtx (V4QImode);
13294 elt = convert_modes (SImode, QImode, elt, true);
13295 emit_move_insn (gen_lowpart (SImode, t1), elt);
13297 t2 = gen_reg_rtx (V8QImode);
13298 t2_low = gen_lowpart (V4QImode, t2);
13299 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13301 t3 = gen_reg_rtx (V8QImode);
13302 t3_low = gen_lowpart (V4QImode, t3);
13303 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13305 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13308 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13309 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13311 static void
13312 vector_init_faligndata (rtx target, rtx elt)
13314 rtx t1 = gen_reg_rtx (V4HImode);
13315 int i;
13317 elt = convert_modes (SImode, HImode, elt, true);
13318 emit_move_insn (gen_lowpart (SImode, t1), elt);
13320 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13321 force_reg (SImode, GEN_INT (6)),
13322 const0_rtx));
13324 for (i = 0; i < 4; i++)
13325 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13328 /* Emit code to initialize TARGET to values for individual fields VALS. */
13330 void
13331 sparc_expand_vector_init (rtx target, rtx vals)
13333 const machine_mode mode = GET_MODE (target);
13334 const machine_mode inner_mode = GET_MODE_INNER (mode);
13335 const int n_elts = GET_MODE_NUNITS (mode);
13336 int i, n_var = 0;
13337 bool all_same = true;
13338 rtx mem;
13340 for (i = 0; i < n_elts; i++)
13342 rtx x = XVECEXP (vals, 0, i);
13343 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13344 n_var++;
13346 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13347 all_same = false;
13350 if (n_var == 0)
13352 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13353 return;
13356 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13358 if (GET_MODE_SIZE (inner_mode) == 4)
13360 emit_move_insn (gen_lowpart (SImode, target),
13361 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13362 return;
13364 else if (GET_MODE_SIZE (inner_mode) == 8)
13366 emit_move_insn (gen_lowpart (DImode, target),
13367 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13368 return;
13371 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13372 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13374 emit_move_insn (gen_highpart (word_mode, target),
13375 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13376 emit_move_insn (gen_lowpart (word_mode, target),
13377 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13378 return;
13381 if (all_same && GET_MODE_SIZE (mode) == 8)
13383 if (TARGET_VIS2)
13385 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13386 return;
13388 if (mode == V8QImode)
13390 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13391 return;
13393 if (mode == V4HImode)
13395 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13396 return;
13400 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13401 for (i = 0; i < n_elts; i++)
13402 emit_move_insn (adjust_address_nv (mem, inner_mode,
13403 i * GET_MODE_SIZE (inner_mode)),
13404 XVECEXP (vals, 0, i));
13405 emit_move_insn (target, mem);
13408 /* Implement TARGET_SECONDARY_RELOAD. */
13410 static reg_class_t
13411 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13412 machine_mode mode, secondary_reload_info *sri)
13414 enum reg_class rclass = (enum reg_class) rclass_i;
13416 sri->icode = CODE_FOR_nothing;
13417 sri->extra_cost = 0;
13419 /* We need a temporary when loading/storing a HImode/QImode value
13420 between memory and the FPU registers. This can happen when combine puts
13421 a paradoxical subreg in a float/fix conversion insn. */
13422 if (FP_REG_CLASS_P (rclass)
13423 && (mode == HImode || mode == QImode)
13424 && (GET_CODE (x) == MEM
13425 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13426 && true_regnum (x) == -1)))
13427 return GENERAL_REGS;
13429 /* On 32-bit we need a temporary when loading/storing a DFmode value
13430 between unaligned memory and the upper FPU registers. */
13431 if (TARGET_ARCH32
13432 && rclass == EXTRA_FP_REGS
13433 && mode == DFmode
13434 && GET_CODE (x) == MEM
13435 && ! mem_min_alignment (x, 8))
13436 return FP_REGS;
13438 if (((TARGET_CM_MEDANY
13439 && symbolic_operand (x, mode))
13440 || (TARGET_CM_EMBMEDANY
13441 && text_segment_operand (x, mode)))
13442 && ! flag_pic)
13444 if (in_p)
13445 sri->icode = direct_optab_handler (reload_in_optab, mode);
13446 else
13447 sri->icode = direct_optab_handler (reload_out_optab, mode);
13448 return NO_REGS;
13451 if (TARGET_VIS3 && TARGET_ARCH32)
13453 int regno = true_regnum (x);
13455 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13456 to move 8-byte values in 4-byte pieces. This only works via
13457 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13458 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13459 an FP_REGS intermediate move. */
13460 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13461 || ((general_or_i64_p (rclass)
13462 || rclass == GENERAL_OR_FP_REGS)
13463 && SPARC_FP_REG_P (regno)))
13465 sri->extra_cost = 2;
13466 return FP_REGS;
13470 return NO_REGS;
13473 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13475 On SPARC when not VIS3 it is not possible to directly move data
13476 between GENERAL_REGS and FP_REGS. */
13478 static bool
13479 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13480 reg_class_t class2)
13482 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13483 && (! TARGET_VIS3
13484 || GET_MODE_SIZE (mode) > 8
13485 || GET_MODE_SIZE (mode) < 4));
13488 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13490 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13491 because the movsi and movsf patterns don't handle r/f moves.
13492 For v8 we copy the default definition. */
13494 static machine_mode
13495 sparc_secondary_memory_needed_mode (machine_mode mode)
13497 if (TARGET_ARCH64)
13499 if (GET_MODE_BITSIZE (mode) < 32)
13500 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13501 return mode;
13503 else
13505 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13506 return mode_for_size (BITS_PER_WORD,
13507 GET_MODE_CLASS (mode), 0).require ();
13508 return mode;
13512 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13513 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13515 bool
13516 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13518 enum rtx_code rc = GET_CODE (operands[1]);
13519 machine_mode cmp_mode;
13520 rtx cc_reg, dst, cmp;
13522 cmp = operands[1];
13523 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13524 return false;
13526 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13527 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13529 cmp_mode = GET_MODE (XEXP (cmp, 0));
13530 rc = GET_CODE (cmp);
13532 dst = operands[0];
13533 if (! rtx_equal_p (operands[2], dst)
13534 && ! rtx_equal_p (operands[3], dst))
13536 if (reg_overlap_mentioned_p (dst, cmp))
13537 dst = gen_reg_rtx (mode);
13539 emit_move_insn (dst, operands[3]);
13541 else if (operands[2] == dst)
13543 operands[2] = operands[3];
13545 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13546 rc = reverse_condition_maybe_unordered (rc);
13547 else
13548 rc = reverse_condition (rc);
13551 if (XEXP (cmp, 1) == const0_rtx
13552 && GET_CODE (XEXP (cmp, 0)) == REG
13553 && cmp_mode == DImode
13554 && v9_regcmp_p (rc))
13555 cc_reg = XEXP (cmp, 0);
13556 else
13557 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13559 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13561 emit_insn (gen_rtx_SET (dst,
13562 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13564 if (dst != operands[0])
13565 emit_move_insn (operands[0], dst);
13567 return true;
13570 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13571 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13572 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13573 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13574 code to be used for the condition mask. */
13576 void
13577 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13579 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13580 enum rtx_code code = GET_CODE (operands[3]);
13582 mask = gen_reg_rtx (Pmode);
13583 cop0 = operands[4];
13584 cop1 = operands[5];
13585 if (code == LT || code == GE)
13587 rtx t;
13589 code = swap_condition (code);
13590 t = cop0; cop0 = cop1; cop1 = t;
13593 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13595 fcmp = gen_rtx_UNSPEC (Pmode,
13596 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13597 fcode);
13599 cmask = gen_rtx_UNSPEC (DImode,
13600 gen_rtvec (2, mask, gsr),
13601 ccode);
13603 bshuf = gen_rtx_UNSPEC (mode,
13604 gen_rtvec (3, operands[1], operands[2], gsr),
13605 UNSPEC_BSHUFFLE);
13607 emit_insn (gen_rtx_SET (mask, fcmp));
13608 emit_insn (gen_rtx_SET (gsr, cmask));
13610 emit_insn (gen_rtx_SET (operands[0], bshuf));
13613 /* On sparc, any mode which naturally allocates into the float
13614 registers should return 4 here. */
13616 unsigned int
13617 sparc_regmode_natural_size (machine_mode mode)
13619 int size = UNITS_PER_WORD;
13621 if (TARGET_ARCH64)
13623 enum mode_class mclass = GET_MODE_CLASS (mode);
13625 if (mclass == MODE_FLOAT || mclass == MODE_VECTOR_INT)
13626 size = 4;
13629 return size;
13632 /* Implement TARGET_HARD_REGNO_NREGS.
13634 On SPARC, ordinary registers hold 32 bits worth; this means both
13635 integer and floating point registers. On v9, integer regs hold 64
13636 bits worth; floating point regs hold 32 bits worth (this includes the
13637 new fp regs as even the odd ones are included in the hard register
13638 count). */
13640 static unsigned int
13641 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13643 if (regno == SPARC_GSR_REG)
13644 return 1;
13645 if (TARGET_ARCH64)
13647 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13648 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13649 return CEIL (GET_MODE_SIZE (mode), 4);
13651 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13654 /* Implement TARGET_HARD_REGNO_MODE_OK.
13656 ??? Because of the funny way we pass parameters we should allow certain
13657 ??? types of float/complex values to be in integer registers during
13658 ??? RTL generation. This only matters on arch32. */
13660 static bool
13661 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13663 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13666 /* Implement TARGET_MODES_TIEABLE_P.
13668 For V9 we have to deal with the fact that only the lower 32 floating
13669 point registers are 32-bit addressable. */
13671 static bool
13672 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13674 enum mode_class mclass1, mclass2;
13675 unsigned short size1, size2;
13677 if (mode1 == mode2)
13678 return true;
13680 mclass1 = GET_MODE_CLASS (mode1);
13681 mclass2 = GET_MODE_CLASS (mode2);
13682 if (mclass1 != mclass2)
13683 return false;
13685 if (! TARGET_V9)
13686 return true;
13688 /* Classes are the same and we are V9 so we have to deal with upper
13689 vs. lower floating point registers. If one of the modes is a
13690 4-byte mode, and the other is not, we have to mark them as not
13691 tieable because only the lower 32 floating point register are
13692 addressable 32-bits at a time.
13694 We can't just test explicitly for SFmode, otherwise we won't
13695 cover the vector mode cases properly. */
13697 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13698 return true;
13700 size1 = GET_MODE_SIZE (mode1);
13701 size2 = GET_MODE_SIZE (mode2);
13702 if ((size1 > 4 && size2 == 4)
13703 || (size2 > 4 && size1 == 4))
13704 return false;
13706 return true;
13709 /* Implement TARGET_CSTORE_MODE. */
13711 static scalar_int_mode
13712 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13714 return (TARGET_ARCH64 ? DImode : SImode);
13717 /* Return the compound expression made of T1 and T2. */
13719 static inline tree
13720 compound_expr (tree t1, tree t2)
13722 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13725 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13727 static void
13728 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13730 if (!TARGET_FPU)
13731 return;
13733 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13734 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13736 /* We generate the equivalent of feholdexcept (&fenv_var):
13738 unsigned int fenv_var;
13739 __builtin_store_fsr (&fenv_var);
13741 unsigned int tmp1_var;
13742 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13744 __builtin_load_fsr (&tmp1_var); */
13746 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13747 TREE_ADDRESSABLE (fenv_var) = 1;
13748 tree fenv_addr = build_fold_addr_expr (fenv_var);
13749 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13750 tree hold_stfsr
13751 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13752 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13754 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13755 TREE_ADDRESSABLE (tmp1_var) = 1;
13756 tree masked_fenv_var
13757 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13758 build_int_cst (unsigned_type_node,
13759 ~(accrued_exception_mask | trap_enable_mask)));
13760 tree hold_mask
13761 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13762 NULL_TREE, NULL_TREE);
13764 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13765 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13766 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13768 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13770 /* We reload the value of tmp1_var to clear the exceptions:
13772 __builtin_load_fsr (&tmp1_var); */
13774 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13776 /* We generate the equivalent of feupdateenv (&fenv_var):
13778 unsigned int tmp2_var;
13779 __builtin_store_fsr (&tmp2_var);
13781 __builtin_load_fsr (&fenv_var);
13783 if (SPARC_LOW_FE_EXCEPT_VALUES)
13784 tmp2_var >>= 5;
13785 __atomic_feraiseexcept ((int) tmp2_var); */
13787 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13788 TREE_ADDRESSABLE (tmp2_var) = 1;
13789 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13790 tree update_stfsr
13791 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13792 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13794 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13796 tree atomic_feraiseexcept
13797 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13798 tree update_call
13799 = build_call_expr (atomic_feraiseexcept, 1,
13800 fold_convert (integer_type_node, tmp2_var));
13802 if (SPARC_LOW_FE_EXCEPT_VALUES)
13804 tree shifted_tmp2_var
13805 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13806 build_int_cst (unsigned_type_node, 5));
13807 tree update_shift
13808 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13809 update_call = compound_expr (update_shift, update_call);
13812 *update
13813 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13816 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13818 SImode loads to floating-point registers are not zero-extended.
13819 The definition for LOAD_EXTEND_OP specifies that integer loads
13820 narrower than BITS_PER_WORD will be zero-extended. As a result,
13821 we inhibit changes from SImode unless they are to a mode that is
13822 identical in size.
13824 Likewise for SFmode, since word-mode paradoxical subregs are
13825 problematic on big-endian architectures. */
13827 static bool
13828 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13829 reg_class_t rclass)
13831 if (TARGET_ARCH64
13832 && GET_MODE_SIZE (from) == 4
13833 && GET_MODE_SIZE (to) != 4)
13834 return !reg_classes_intersect_p (rclass, FP_REGS);
13835 return true;
13838 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13840 static HOST_WIDE_INT
13841 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13843 if (TREE_CODE (exp) == STRING_CST)
13844 return MAX (align, FASTEST_ALIGNMENT);
13845 return align;
13848 #include "gt-sparc.h"