Daily bump.
[official-gcc.git] / gcc / config / sparc / sparc.cc
blob3935a97fac8b85e85aba62af9945d102a50209ba
1 /* Subroutines for insn-output.cc for SPARC.
2 Copyright (C) 1987-2024 Free Software Foundation, Inc.
3 Contributed by Michael Tiemann (tiemann@cygnus.com)
4 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
5 at Cygnus Support.
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 3, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #define IN_TARGET_CODE 1
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "memmodel.h"
33 #include "gimple.h"
34 #include "df.h"
35 #include "tm_p.h"
36 #include "stringpool.h"
37 #include "attribs.h"
38 #include "expmed.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "diagnostic-core.h"
44 #include "alias.h"
45 #include "fold-const.h"
46 #include "stor-layout.h"
47 #include "calls.h"
48 #include "varasm.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "debug.h"
54 #include "cfgrtl.h"
55 #include "common/common-target.h"
56 #include "gimplify.h"
57 #include "langhooks.h"
58 #include "reload.h"
59 #include "tree-pass.h"
60 #include "context.h"
61 #include "builtins.h"
62 #include "tree-vector-builder.h"
63 #include "opts.h"
64 #include "dwarf2out.h"
66 /* This file should be included last. */
67 #include "target-def.h"
69 /* Processor costs */
71 struct processor_costs {
72 /* Integer load */
73 const int int_load;
75 /* Integer signed load */
76 const int int_sload;
78 /* Integer zeroed load */
79 const int int_zload;
81 /* Float load */
82 const int float_load;
84 /* fmov, fneg, fabs */
85 const int float_move;
87 /* fadd, fsub */
88 const int float_plusminus;
90 /* fcmp */
91 const int float_cmp;
93 /* fmov, fmovr */
94 const int float_cmove;
96 /* fmul */
97 const int float_mul;
99 /* fdivs */
100 const int float_div_sf;
102 /* fdivd */
103 const int float_div_df;
105 /* fsqrts */
106 const int float_sqrt_sf;
108 /* fsqrtd */
109 const int float_sqrt_df;
111 /* umul/smul */
112 const int int_mul;
114 /* mulX */
115 const int int_mulX;
117 /* integer multiply cost for each bit set past the most
118 significant 3, so the formula for multiply cost becomes:
120 if (rs1 < 0)
121 highest_bit = highest_clear_bit(rs1);
122 else
123 highest_bit = highest_set_bit(rs1);
124 if (highest_bit < 3)
125 highest_bit = 3;
126 cost = int_mul{,X} + ((highest_bit - 3) / int_mul_bit_factor);
128 A value of zero indicates that the multiply costs is fixed,
129 and not variable. */
130 const int int_mul_bit_factor;
132 /* udiv/sdiv */
133 const int int_div;
135 /* divX */
136 const int int_divX;
138 /* movcc, movr */
139 const int int_cmove;
141 /* penalty for shifts, due to scheduling rules etc. */
142 const int shift_penalty;
144 /* cost of a (predictable) branch. */
145 const int branch_cost;
148 static const
149 struct processor_costs cypress_costs = {
150 COSTS_N_INSNS (2), /* int load */
151 COSTS_N_INSNS (2), /* int signed load */
152 COSTS_N_INSNS (2), /* int zeroed load */
153 COSTS_N_INSNS (2), /* float load */
154 COSTS_N_INSNS (5), /* fmov, fneg, fabs */
155 COSTS_N_INSNS (5), /* fadd, fsub */
156 COSTS_N_INSNS (1), /* fcmp */
157 COSTS_N_INSNS (1), /* fmov, fmovr */
158 COSTS_N_INSNS (7), /* fmul */
159 COSTS_N_INSNS (37), /* fdivs */
160 COSTS_N_INSNS (37), /* fdivd */
161 COSTS_N_INSNS (63), /* fsqrts */
162 COSTS_N_INSNS (63), /* fsqrtd */
163 COSTS_N_INSNS (1), /* imul */
164 COSTS_N_INSNS (1), /* imulX */
165 0, /* imul bit factor */
166 COSTS_N_INSNS (1), /* idiv */
167 COSTS_N_INSNS (1), /* idivX */
168 COSTS_N_INSNS (1), /* movcc/movr */
169 0, /* shift penalty */
170 3 /* branch cost */
173 static const
174 struct processor_costs supersparc_costs = {
175 COSTS_N_INSNS (1), /* int load */
176 COSTS_N_INSNS (1), /* int signed load */
177 COSTS_N_INSNS (1), /* int zeroed load */
178 COSTS_N_INSNS (0), /* float load */
179 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
180 COSTS_N_INSNS (3), /* fadd, fsub */
181 COSTS_N_INSNS (3), /* fcmp */
182 COSTS_N_INSNS (1), /* fmov, fmovr */
183 COSTS_N_INSNS (3), /* fmul */
184 COSTS_N_INSNS (6), /* fdivs */
185 COSTS_N_INSNS (9), /* fdivd */
186 COSTS_N_INSNS (12), /* fsqrts */
187 COSTS_N_INSNS (12), /* fsqrtd */
188 COSTS_N_INSNS (4), /* imul */
189 COSTS_N_INSNS (4), /* imulX */
190 0, /* imul bit factor */
191 COSTS_N_INSNS (4), /* idiv */
192 COSTS_N_INSNS (4), /* idivX */
193 COSTS_N_INSNS (1), /* movcc/movr */
194 1, /* shift penalty */
195 3 /* branch cost */
198 static const
199 struct processor_costs hypersparc_costs = {
200 COSTS_N_INSNS (1), /* int load */
201 COSTS_N_INSNS (1), /* int signed load */
202 COSTS_N_INSNS (1), /* int zeroed load */
203 COSTS_N_INSNS (1), /* float load */
204 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
205 COSTS_N_INSNS (1), /* fadd, fsub */
206 COSTS_N_INSNS (1), /* fcmp */
207 COSTS_N_INSNS (1), /* fmov, fmovr */
208 COSTS_N_INSNS (1), /* fmul */
209 COSTS_N_INSNS (8), /* fdivs */
210 COSTS_N_INSNS (12), /* fdivd */
211 COSTS_N_INSNS (17), /* fsqrts */
212 COSTS_N_INSNS (17), /* fsqrtd */
213 COSTS_N_INSNS (17), /* imul */
214 COSTS_N_INSNS (17), /* imulX */
215 0, /* imul bit factor */
216 COSTS_N_INSNS (17), /* idiv */
217 COSTS_N_INSNS (17), /* idivX */
218 COSTS_N_INSNS (1), /* movcc/movr */
219 0, /* shift penalty */
220 3 /* branch cost */
223 static const
224 struct processor_costs leon_costs = {
225 COSTS_N_INSNS (1), /* int load */
226 COSTS_N_INSNS (1), /* int signed load */
227 COSTS_N_INSNS (1), /* int zeroed load */
228 COSTS_N_INSNS (1), /* float load */
229 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
230 COSTS_N_INSNS (1), /* fadd, fsub */
231 COSTS_N_INSNS (1), /* fcmp */
232 COSTS_N_INSNS (1), /* fmov, fmovr */
233 COSTS_N_INSNS (1), /* fmul */
234 COSTS_N_INSNS (15), /* fdivs */
235 COSTS_N_INSNS (15), /* fdivd */
236 COSTS_N_INSNS (23), /* fsqrts */
237 COSTS_N_INSNS (23), /* fsqrtd */
238 COSTS_N_INSNS (5), /* imul */
239 COSTS_N_INSNS (5), /* imulX */
240 0, /* imul bit factor */
241 COSTS_N_INSNS (5), /* idiv */
242 COSTS_N_INSNS (5), /* idivX */
243 COSTS_N_INSNS (1), /* movcc/movr */
244 0, /* shift penalty */
245 3 /* branch cost */
248 static const
249 struct processor_costs leon3_costs = {
250 COSTS_N_INSNS (1), /* int load */
251 COSTS_N_INSNS (1), /* int signed load */
252 COSTS_N_INSNS (1), /* int zeroed load */
253 COSTS_N_INSNS (1), /* float load */
254 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
255 COSTS_N_INSNS (1), /* fadd, fsub */
256 COSTS_N_INSNS (1), /* fcmp */
257 COSTS_N_INSNS (1), /* fmov, fmovr */
258 COSTS_N_INSNS (1), /* fmul */
259 COSTS_N_INSNS (14), /* fdivs */
260 COSTS_N_INSNS (15), /* fdivd */
261 COSTS_N_INSNS (22), /* fsqrts */
262 COSTS_N_INSNS (23), /* fsqrtd */
263 COSTS_N_INSNS (5), /* imul */
264 COSTS_N_INSNS (5), /* imulX */
265 0, /* imul bit factor */
266 COSTS_N_INSNS (35), /* idiv */
267 COSTS_N_INSNS (35), /* idivX */
268 COSTS_N_INSNS (1), /* movcc/movr */
269 0, /* shift penalty */
270 3 /* branch cost */
273 static const
274 struct processor_costs leon5_costs = {
275 COSTS_N_INSNS (1), /* int load */
276 COSTS_N_INSNS (1), /* int signed load */
277 COSTS_N_INSNS (1), /* int zeroed load */
278 COSTS_N_INSNS (1), /* float load */
279 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
280 COSTS_N_INSNS (1), /* fadd, fsub */
281 COSTS_N_INSNS (1), /* fcmp */
282 COSTS_N_INSNS (1), /* fmov, fmovr */
283 COSTS_N_INSNS (1), /* fmul */
284 COSTS_N_INSNS (17), /* fdivs */
285 COSTS_N_INSNS (18), /* fdivd */
286 COSTS_N_INSNS (25), /* fsqrts */
287 COSTS_N_INSNS (26), /* fsqrtd */
288 COSTS_N_INSNS (4), /* imul */
289 COSTS_N_INSNS (4), /* imulX */
290 0, /* imul bit factor */
291 COSTS_N_INSNS (35), /* idiv */
292 COSTS_N_INSNS (35), /* idivX */
293 COSTS_N_INSNS (1), /* movcc/movr */
294 0, /* shift penalty */
295 3 /* branch cost */
298 static const
299 struct processor_costs sparclet_costs = {
300 COSTS_N_INSNS (3), /* int load */
301 COSTS_N_INSNS (3), /* int signed load */
302 COSTS_N_INSNS (1), /* int zeroed load */
303 COSTS_N_INSNS (1), /* float load */
304 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
305 COSTS_N_INSNS (1), /* fadd, fsub */
306 COSTS_N_INSNS (1), /* fcmp */
307 COSTS_N_INSNS (1), /* fmov, fmovr */
308 COSTS_N_INSNS (1), /* fmul */
309 COSTS_N_INSNS (1), /* fdivs */
310 COSTS_N_INSNS (1), /* fdivd */
311 COSTS_N_INSNS (1), /* fsqrts */
312 COSTS_N_INSNS (1), /* fsqrtd */
313 COSTS_N_INSNS (5), /* imul */
314 COSTS_N_INSNS (5), /* imulX */
315 0, /* imul bit factor */
316 COSTS_N_INSNS (5), /* idiv */
317 COSTS_N_INSNS (5), /* idivX */
318 COSTS_N_INSNS (1), /* movcc/movr */
319 0, /* shift penalty */
320 3 /* branch cost */
323 static const
324 struct processor_costs ultrasparc_costs = {
325 COSTS_N_INSNS (2), /* int load */
326 COSTS_N_INSNS (3), /* int signed load */
327 COSTS_N_INSNS (2), /* int zeroed load */
328 COSTS_N_INSNS (2), /* float load */
329 COSTS_N_INSNS (1), /* fmov, fneg, fabs */
330 COSTS_N_INSNS (4), /* fadd, fsub */
331 COSTS_N_INSNS (1), /* fcmp */
332 COSTS_N_INSNS (2), /* fmov, fmovr */
333 COSTS_N_INSNS (4), /* fmul */
334 COSTS_N_INSNS (13), /* fdivs */
335 COSTS_N_INSNS (23), /* fdivd */
336 COSTS_N_INSNS (13), /* fsqrts */
337 COSTS_N_INSNS (23), /* fsqrtd */
338 COSTS_N_INSNS (4), /* imul */
339 COSTS_N_INSNS (4), /* imulX */
340 2, /* imul bit factor */
341 COSTS_N_INSNS (37), /* idiv */
342 COSTS_N_INSNS (68), /* idivX */
343 COSTS_N_INSNS (2), /* movcc/movr */
344 2, /* shift penalty */
345 2 /* branch cost */
348 static const
349 struct processor_costs ultrasparc3_costs = {
350 COSTS_N_INSNS (2), /* int load */
351 COSTS_N_INSNS (3), /* int signed load */
352 COSTS_N_INSNS (3), /* int zeroed load */
353 COSTS_N_INSNS (2), /* float load */
354 COSTS_N_INSNS (3), /* fmov, fneg, fabs */
355 COSTS_N_INSNS (4), /* fadd, fsub */
356 COSTS_N_INSNS (5), /* fcmp */
357 COSTS_N_INSNS (3), /* fmov, fmovr */
358 COSTS_N_INSNS (4), /* fmul */
359 COSTS_N_INSNS (17), /* fdivs */
360 COSTS_N_INSNS (20), /* fdivd */
361 COSTS_N_INSNS (20), /* fsqrts */
362 COSTS_N_INSNS (29), /* fsqrtd */
363 COSTS_N_INSNS (6), /* imul */
364 COSTS_N_INSNS (6), /* imulX */
365 0, /* imul bit factor */
366 COSTS_N_INSNS (40), /* idiv */
367 COSTS_N_INSNS (71), /* idivX */
368 COSTS_N_INSNS (2), /* movcc/movr */
369 0, /* shift penalty */
370 2 /* branch cost */
373 static const
374 struct processor_costs niagara_costs = {
375 COSTS_N_INSNS (3), /* int load */
376 COSTS_N_INSNS (3), /* int signed load */
377 COSTS_N_INSNS (3), /* int zeroed load */
378 COSTS_N_INSNS (9), /* float load */
379 COSTS_N_INSNS (8), /* fmov, fneg, fabs */
380 COSTS_N_INSNS (8), /* fadd, fsub */
381 COSTS_N_INSNS (26), /* fcmp */
382 COSTS_N_INSNS (8), /* fmov, fmovr */
383 COSTS_N_INSNS (29), /* fmul */
384 COSTS_N_INSNS (54), /* fdivs */
385 COSTS_N_INSNS (83), /* fdivd */
386 COSTS_N_INSNS (100), /* fsqrts - not implemented in hardware */
387 COSTS_N_INSNS (100), /* fsqrtd - not implemented in hardware */
388 COSTS_N_INSNS (11), /* imul */
389 COSTS_N_INSNS (11), /* imulX */
390 0, /* imul bit factor */
391 COSTS_N_INSNS (72), /* idiv */
392 COSTS_N_INSNS (72), /* idivX */
393 COSTS_N_INSNS (1), /* movcc/movr */
394 0, /* shift penalty */
395 4 /* branch cost */
398 static const
399 struct processor_costs niagara2_costs = {
400 COSTS_N_INSNS (3), /* int load */
401 COSTS_N_INSNS (3), /* int signed load */
402 COSTS_N_INSNS (3), /* int zeroed load */
403 COSTS_N_INSNS (3), /* float load */
404 COSTS_N_INSNS (6), /* fmov, fneg, fabs */
405 COSTS_N_INSNS (6), /* fadd, fsub */
406 COSTS_N_INSNS (6), /* fcmp */
407 COSTS_N_INSNS (6), /* fmov, fmovr */
408 COSTS_N_INSNS (6), /* fmul */
409 COSTS_N_INSNS (19), /* fdivs */
410 COSTS_N_INSNS (33), /* fdivd */
411 COSTS_N_INSNS (19), /* fsqrts */
412 COSTS_N_INSNS (33), /* fsqrtd */
413 COSTS_N_INSNS (5), /* imul */
414 COSTS_N_INSNS (5), /* imulX */
415 0, /* imul bit factor */
416 COSTS_N_INSNS (26), /* idiv, average of 12 - 41 cycle range */
417 COSTS_N_INSNS (26), /* idivX, average of 12 - 41 cycle range */
418 COSTS_N_INSNS (1), /* movcc/movr */
419 0, /* shift penalty */
420 5 /* branch cost */
423 static const
424 struct processor_costs niagara3_costs = {
425 COSTS_N_INSNS (3), /* int load */
426 COSTS_N_INSNS (3), /* int signed load */
427 COSTS_N_INSNS (3), /* int zeroed load */
428 COSTS_N_INSNS (3), /* float load */
429 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
430 COSTS_N_INSNS (9), /* fadd, fsub */
431 COSTS_N_INSNS (9), /* fcmp */
432 COSTS_N_INSNS (9), /* fmov, fmovr */
433 COSTS_N_INSNS (9), /* fmul */
434 COSTS_N_INSNS (23), /* fdivs */
435 COSTS_N_INSNS (37), /* fdivd */
436 COSTS_N_INSNS (23), /* fsqrts */
437 COSTS_N_INSNS (37), /* fsqrtd */
438 COSTS_N_INSNS (9), /* imul */
439 COSTS_N_INSNS (9), /* imulX */
440 0, /* imul bit factor */
441 COSTS_N_INSNS (31), /* idiv, average of 17 - 45 cycle range */
442 COSTS_N_INSNS (30), /* idivX, average of 16 - 44 cycle range */
443 COSTS_N_INSNS (1), /* movcc/movr */
444 0, /* shift penalty */
445 5 /* branch cost */
448 static const
449 struct processor_costs niagara4_costs = {
450 COSTS_N_INSNS (5), /* int load */
451 COSTS_N_INSNS (5), /* int signed load */
452 COSTS_N_INSNS (5), /* int zeroed load */
453 COSTS_N_INSNS (5), /* float load */
454 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
455 COSTS_N_INSNS (11), /* fadd, fsub */
456 COSTS_N_INSNS (11), /* fcmp */
457 COSTS_N_INSNS (11), /* fmov, fmovr */
458 COSTS_N_INSNS (11), /* fmul */
459 COSTS_N_INSNS (24), /* fdivs */
460 COSTS_N_INSNS (37), /* fdivd */
461 COSTS_N_INSNS (24), /* fsqrts */
462 COSTS_N_INSNS (37), /* fsqrtd */
463 COSTS_N_INSNS (12), /* imul */
464 COSTS_N_INSNS (12), /* imulX */
465 0, /* imul bit factor */
466 COSTS_N_INSNS (50), /* idiv, average of 41 - 60 cycle range */
467 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
468 COSTS_N_INSNS (1), /* movcc/movr */
469 0, /* shift penalty */
470 2 /* branch cost */
473 static const
474 struct processor_costs niagara7_costs = {
475 COSTS_N_INSNS (5), /* int load */
476 COSTS_N_INSNS (5), /* int signed load */
477 COSTS_N_INSNS (5), /* int zeroed load */
478 COSTS_N_INSNS (5), /* float load */
479 COSTS_N_INSNS (11), /* fmov, fneg, fabs */
480 COSTS_N_INSNS (11), /* fadd, fsub */
481 COSTS_N_INSNS (11), /* fcmp */
482 COSTS_N_INSNS (11), /* fmov, fmovr */
483 COSTS_N_INSNS (11), /* fmul */
484 COSTS_N_INSNS (24), /* fdivs */
485 COSTS_N_INSNS (37), /* fdivd */
486 COSTS_N_INSNS (24), /* fsqrts */
487 COSTS_N_INSNS (37), /* fsqrtd */
488 COSTS_N_INSNS (12), /* imul */
489 COSTS_N_INSNS (12), /* imulX */
490 0, /* imul bit factor */
491 COSTS_N_INSNS (51), /* idiv, average of 42 - 61 cycle range */
492 COSTS_N_INSNS (35), /* idivX, average of 26 - 44 cycle range */
493 COSTS_N_INSNS (1), /* movcc/movr */
494 0, /* shift penalty */
495 1 /* branch cost */
498 static const
499 struct processor_costs m8_costs = {
500 COSTS_N_INSNS (3), /* int load */
501 COSTS_N_INSNS (3), /* int signed load */
502 COSTS_N_INSNS (3), /* int zeroed load */
503 COSTS_N_INSNS (3), /* float load */
504 COSTS_N_INSNS (9), /* fmov, fneg, fabs */
505 COSTS_N_INSNS (9), /* fadd, fsub */
506 COSTS_N_INSNS (9), /* fcmp */
507 COSTS_N_INSNS (9), /* fmov, fmovr */
508 COSTS_N_INSNS (9), /* fmul */
509 COSTS_N_INSNS (26), /* fdivs */
510 COSTS_N_INSNS (30), /* fdivd */
511 COSTS_N_INSNS (33), /* fsqrts */
512 COSTS_N_INSNS (41), /* fsqrtd */
513 COSTS_N_INSNS (12), /* imul */
514 COSTS_N_INSNS (10), /* imulX */
515 0, /* imul bit factor */
516 COSTS_N_INSNS (57), /* udiv/sdiv */
517 COSTS_N_INSNS (30), /* udivx/sdivx */
518 COSTS_N_INSNS (1), /* movcc/movr */
519 0, /* shift penalty */
520 1 /* branch cost */
523 static const struct processor_costs *sparc_costs = &cypress_costs;
525 #ifdef HAVE_AS_RELAX_OPTION
526 /* If 'as' and 'ld' are relaxing tail call insns into branch always, use
527 "or %o7,%g0,X; call Y; or X,%g0,%o7" always, so that it can be optimized.
528 With sethi/jmp, neither 'as' nor 'ld' has an easy way how to find out if
529 somebody does not branch between the sethi and jmp. */
530 #define LEAF_SIBCALL_SLOT_RESERVED_P 1
531 #else
532 #define LEAF_SIBCALL_SLOT_RESERVED_P \
533 ((TARGET_ARCH64 && !TARGET_CM_MEDLOW) || flag_pic)
534 #endif
536 /* Vector, indexed by hard register number, which contains 1
537 for a register that is allowable in a candidate for leaf
538 function treatment. */
539 char sparc_leaf_regs[] =
540 { 1, 1, 1, 1, 1, 1, 1, 1,
541 0, 0, 0, 0, 0, 0, 1, 0,
542 0, 0, 0, 0, 0, 0, 0, 0,
543 1, 1, 1, 1, 1, 1, 0, 1,
544 1, 1, 1, 1, 1, 1, 1, 1,
545 1, 1, 1, 1, 1, 1, 1, 1,
546 1, 1, 1, 1, 1, 1, 1, 1,
547 1, 1, 1, 1, 1, 1, 1, 1,
548 1, 1, 1, 1, 1, 1, 1, 1,
549 1, 1, 1, 1, 1, 1, 1, 1,
550 1, 1, 1, 1, 1, 1, 1, 1,
551 1, 1, 1, 1, 1, 1, 1, 1,
552 1, 1, 1, 1, 1, 1, 1};
554 struct GTY(()) machine_function
556 /* Size of the frame of the function. */
557 HOST_WIDE_INT frame_size;
559 /* Size of the frame of the function minus the register window save area
560 and the outgoing argument area. */
561 HOST_WIDE_INT apparent_frame_size;
563 /* Register we pretend the frame pointer is allocated to. Normally, this
564 is %fp, but if we are in a leaf procedure, this is (%sp + offset). We
565 record "offset" separately as it may be too big for (reg + disp). */
566 rtx frame_base_reg;
567 HOST_WIDE_INT frame_base_offset;
569 /* Number of global or FP registers to be saved (as 4-byte quantities). */
570 int n_global_fp_regs;
572 /* True if the current function is leaf and uses only leaf regs,
573 so that the SPARC leaf function optimization can be applied.
574 Private version of crtl->uses_only_leaf_regs, see
575 sparc_expand_prologue for the rationale. */
576 int leaf_function_p;
578 /* True if the prologue saves local or in registers. */
579 bool save_local_in_regs_p;
581 /* True if the data calculated by sparc_expand_prologue are valid. */
582 bool prologue_data_valid_p;
585 #define sparc_frame_size cfun->machine->frame_size
586 #define sparc_apparent_frame_size cfun->machine->apparent_frame_size
587 #define sparc_frame_base_reg cfun->machine->frame_base_reg
588 #define sparc_frame_base_offset cfun->machine->frame_base_offset
589 #define sparc_n_global_fp_regs cfun->machine->n_global_fp_regs
590 #define sparc_leaf_function_p cfun->machine->leaf_function_p
591 #define sparc_save_local_in_regs_p cfun->machine->save_local_in_regs_p
592 #define sparc_prologue_data_valid_p cfun->machine->prologue_data_valid_p
594 /* 1 if the next opcode is to be specially indented. */
595 int sparc_indent_opcode = 0;
597 static void sparc_option_override (void);
598 static void sparc_init_modes (void);
599 static int function_arg_slotno (const CUMULATIVE_ARGS *, machine_mode,
600 const_tree, bool, bool, int *, int *);
602 static int supersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
603 static int hypersparc_adjust_cost (rtx_insn *, int, rtx_insn *, int);
604 static int leon5_adjust_cost (rtx_insn *, int, rtx_insn *, int);
606 static void sparc_emit_set_const32 (rtx, rtx);
607 static void sparc_emit_set_const64 (rtx, rtx);
608 static void sparc_output_addr_vec (rtx);
609 static void sparc_output_addr_diff_vec (rtx);
610 static void sparc_output_deferred_case_vectors (void);
611 static bool sparc_legitimate_address_p (machine_mode, rtx, bool,
612 code_helper = ERROR_MARK);
613 static bool sparc_legitimate_constant_p (machine_mode, rtx);
614 static rtx sparc_builtin_saveregs (void);
615 static int epilogue_renumber (rtx *, int);
616 static bool sparc_assemble_integer (rtx, unsigned int, int);
617 static int set_extends (rtx_insn *);
618 static void sparc_asm_function_prologue (FILE *);
619 static void sparc_asm_function_epilogue (FILE *);
620 #ifdef TARGET_SOLARIS
621 static void sparc_solaris_elf_asm_named_section (const char *, unsigned int,
622 tree) ATTRIBUTE_UNUSED;
623 #endif
624 static int sparc_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
625 static int sparc_issue_rate (void);
626 static void sparc_sched_init (FILE *, int, int);
627 static int sparc_use_sched_lookahead (void);
629 static void emit_soft_tfmode_libcall (const char *, int, rtx *);
630 static void emit_soft_tfmode_binop (enum rtx_code, rtx *);
631 static void emit_soft_tfmode_unop (enum rtx_code, rtx *);
632 static void emit_soft_tfmode_cvt (enum rtx_code, rtx *);
633 static void emit_hard_tfmode_operation (enum rtx_code, rtx *);
635 static bool sparc_function_ok_for_sibcall (tree, tree);
636 static void sparc_init_libfuncs (void);
637 static void sparc_init_builtins (void);
638 static void sparc_fpu_init_builtins (void);
639 static void sparc_vis_init_builtins (void);
640 static tree sparc_builtin_decl (unsigned, bool);
641 static rtx sparc_expand_builtin (tree, rtx, rtx, machine_mode, int);
642 static tree sparc_fold_builtin (tree, int, tree *, bool);
643 static void sparc_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
644 HOST_WIDE_INT, tree);
645 static bool sparc_can_output_mi_thunk (const_tree, HOST_WIDE_INT,
646 HOST_WIDE_INT, const_tree);
647 static struct machine_function * sparc_init_machine_status (void);
648 static bool sparc_cannot_force_const_mem (machine_mode, rtx);
649 static rtx sparc_tls_get_addr (void);
650 static rtx sparc_tls_got (void);
651 static int sparc_register_move_cost (machine_mode,
652 reg_class_t, reg_class_t);
653 static bool sparc_rtx_costs (rtx, machine_mode, int, int, int *, bool);
654 static machine_mode sparc_promote_function_mode (const_tree, machine_mode,
655 int *, const_tree, int);
656 static bool sparc_strict_argument_naming (cumulative_args_t);
657 static void sparc_va_start (tree, rtx);
658 static tree sparc_gimplify_va_arg (tree, tree, gimple_seq *, gimple_seq *);
659 static bool sparc_vector_mode_supported_p (machine_mode);
660 static bool sparc_tls_referenced_p (rtx);
661 static rtx sparc_legitimize_tls_address (rtx);
662 static rtx sparc_legitimize_pic_address (rtx, rtx);
663 static rtx sparc_legitimize_address (rtx, rtx, machine_mode);
664 static rtx sparc_delegitimize_address (rtx);
665 static bool sparc_mode_dependent_address_p (const_rtx, addr_space_t);
666 static bool sparc_pass_by_reference (cumulative_args_t,
667 const function_arg_info &);
668 static void sparc_function_arg_advance (cumulative_args_t,
669 const function_arg_info &);
670 static rtx sparc_function_arg (cumulative_args_t, const function_arg_info &);
671 static rtx sparc_function_incoming_arg (cumulative_args_t,
672 const function_arg_info &);
673 static pad_direction sparc_function_arg_padding (machine_mode, const_tree);
674 static unsigned int sparc_function_arg_boundary (machine_mode,
675 const_tree);
676 static int sparc_arg_partial_bytes (cumulative_args_t,
677 const function_arg_info &);
678 static bool sparc_return_in_memory (const_tree, const_tree);
679 static rtx sparc_struct_value_rtx (tree, int);
680 static rtx sparc_function_value (const_tree, const_tree, bool);
681 static rtx sparc_libcall_value (machine_mode, const_rtx);
682 static bool sparc_function_value_regno_p (const unsigned int);
683 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
684 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
685 static bool sparc_output_cfi_directive (FILE *, dw_cfi_ref);
686 static bool sparc_dw_cfi_oprnd1_desc (dwarf_call_frame_info,
687 dw_cfi_oprnd_type &);
688 static void sparc_file_end (void);
689 static bool sparc_frame_pointer_required (void);
690 static bool sparc_can_eliminate (const int, const int);
691 static void sparc_conditional_register_usage (void);
692 static bool sparc_use_pseudo_pic_reg (void);
693 static void sparc_init_pic_reg (void);
694 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
695 static const char *sparc_mangle_type (const_tree);
696 #endif
697 static void sparc_trampoline_init (rtx, tree, rtx);
698 static machine_mode sparc_preferred_simd_mode (scalar_mode);
699 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
700 static bool sparc_print_operand_punct_valid_p (unsigned char);
701 static void sparc_print_operand (FILE *, rtx, int);
702 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
703 static reg_class_t sparc_secondary_reload (bool, rtx, reg_class_t,
704 machine_mode,
705 secondary_reload_info *);
706 static bool sparc_secondary_memory_needed (machine_mode, reg_class_t,
707 reg_class_t);
708 static machine_mode sparc_secondary_memory_needed_mode (machine_mode);
709 static scalar_int_mode sparc_cstore_mode (enum insn_code icode);
710 static void sparc_atomic_assign_expand_fenv (tree *, tree *, tree *);
711 static bool sparc_fixed_condition_code_regs (unsigned int *, unsigned int *);
712 static unsigned int sparc_min_arithmetic_precision (void);
713 static unsigned int sparc_hard_regno_nregs (unsigned int, machine_mode);
714 static bool sparc_hard_regno_mode_ok (unsigned int, machine_mode);
715 static bool sparc_modes_tieable_p (machine_mode, machine_mode);
716 static bool sparc_can_change_mode_class (machine_mode, machine_mode,
717 reg_class_t);
718 static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
719 static bool sparc_vectorize_vec_perm_const (machine_mode, machine_mode,
720 rtx, rtx, rtx,
721 const vec_perm_indices &);
722 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
723 static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
724 static machine_mode sparc_c_mode_for_floating_type (enum tree_index);
726 #ifdef SUBTARGET_ATTRIBUTE_TABLE
727 /* Table of valid machine attributes. */
728 TARGET_GNU_ATTRIBUTES (sparc_attribute_table,
730 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
731 do_diagnostic, handler, exclude } */
732 SUBTARGET_ATTRIBUTE_TABLE
734 #endif
736 char sparc_hard_reg_printed[8];
738 /* Initialize the GCC target structure. */
740 /* The default is to use .half rather than .short for aligned HI objects. */
741 #undef TARGET_ASM_ALIGNED_HI_OP
742 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
744 #undef TARGET_ASM_UNALIGNED_HI_OP
745 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uahalf\t"
746 #undef TARGET_ASM_UNALIGNED_SI_OP
747 #define TARGET_ASM_UNALIGNED_SI_OP "\t.uaword\t"
748 #undef TARGET_ASM_UNALIGNED_DI_OP
749 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaxword\t"
751 /* The target hook has to handle DI-mode values. */
752 #undef TARGET_ASM_INTEGER
753 #define TARGET_ASM_INTEGER sparc_assemble_integer
755 #undef TARGET_ASM_FUNCTION_PROLOGUE
756 #define TARGET_ASM_FUNCTION_PROLOGUE sparc_asm_function_prologue
757 #undef TARGET_ASM_FUNCTION_EPILOGUE
758 #define TARGET_ASM_FUNCTION_EPILOGUE sparc_asm_function_epilogue
760 #undef TARGET_SCHED_ADJUST_COST
761 #define TARGET_SCHED_ADJUST_COST sparc_adjust_cost
762 #undef TARGET_SCHED_ISSUE_RATE
763 #define TARGET_SCHED_ISSUE_RATE sparc_issue_rate
764 #undef TARGET_SCHED_INIT
765 #define TARGET_SCHED_INIT sparc_sched_init
766 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
767 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD sparc_use_sched_lookahead
769 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
770 #define TARGET_FUNCTION_OK_FOR_SIBCALL sparc_function_ok_for_sibcall
772 #undef TARGET_INIT_LIBFUNCS
773 #define TARGET_INIT_LIBFUNCS sparc_init_libfuncs
775 #undef TARGET_LEGITIMIZE_ADDRESS
776 #define TARGET_LEGITIMIZE_ADDRESS sparc_legitimize_address
777 #undef TARGET_DELEGITIMIZE_ADDRESS
778 #define TARGET_DELEGITIMIZE_ADDRESS sparc_delegitimize_address
779 #undef TARGET_MODE_DEPENDENT_ADDRESS_P
780 #define TARGET_MODE_DEPENDENT_ADDRESS_P sparc_mode_dependent_address_p
782 #undef TARGET_INIT_BUILTINS
783 #define TARGET_INIT_BUILTINS sparc_init_builtins
784 #undef TARGET_BUILTIN_DECL
785 #define TARGET_BUILTIN_DECL sparc_builtin_decl
786 #undef TARGET_EXPAND_BUILTIN
787 #define TARGET_EXPAND_BUILTIN sparc_expand_builtin
788 #undef TARGET_FOLD_BUILTIN
789 #define TARGET_FOLD_BUILTIN sparc_fold_builtin
791 #if TARGET_TLS
792 #undef TARGET_HAVE_TLS
793 #define TARGET_HAVE_TLS true
794 #endif
796 #undef TARGET_CANNOT_FORCE_CONST_MEM
797 #define TARGET_CANNOT_FORCE_CONST_MEM sparc_cannot_force_const_mem
799 #undef TARGET_ASM_OUTPUT_MI_THUNK
800 #define TARGET_ASM_OUTPUT_MI_THUNK sparc_output_mi_thunk
801 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
802 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK sparc_can_output_mi_thunk
804 #undef TARGET_RTX_COSTS
805 #define TARGET_RTX_COSTS sparc_rtx_costs
806 #undef TARGET_ADDRESS_COST
807 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
808 #undef TARGET_REGISTER_MOVE_COST
809 #define TARGET_REGISTER_MOVE_COST sparc_register_move_cost
811 #undef TARGET_PROMOTE_FUNCTION_MODE
812 #define TARGET_PROMOTE_FUNCTION_MODE sparc_promote_function_mode
813 #undef TARGET_STRICT_ARGUMENT_NAMING
814 #define TARGET_STRICT_ARGUMENT_NAMING sparc_strict_argument_naming
816 #undef TARGET_MUST_PASS_IN_STACK
817 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
818 #undef TARGET_PASS_BY_REFERENCE
819 #define TARGET_PASS_BY_REFERENCE sparc_pass_by_reference
820 #undef TARGET_ARG_PARTIAL_BYTES
821 #define TARGET_ARG_PARTIAL_BYTES sparc_arg_partial_bytes
822 #undef TARGET_FUNCTION_ARG_ADVANCE
823 #define TARGET_FUNCTION_ARG_ADVANCE sparc_function_arg_advance
824 #undef TARGET_FUNCTION_ARG
825 #define TARGET_FUNCTION_ARG sparc_function_arg
826 #undef TARGET_FUNCTION_INCOMING_ARG
827 #define TARGET_FUNCTION_INCOMING_ARG sparc_function_incoming_arg
828 #undef TARGET_FUNCTION_ARG_PADDING
829 #define TARGET_FUNCTION_ARG_PADDING sparc_function_arg_padding
830 #undef TARGET_FUNCTION_ARG_BOUNDARY
831 #define TARGET_FUNCTION_ARG_BOUNDARY sparc_function_arg_boundary
833 #undef TARGET_RETURN_IN_MEMORY
834 #define TARGET_RETURN_IN_MEMORY sparc_return_in_memory
835 #undef TARGET_STRUCT_VALUE_RTX
836 #define TARGET_STRUCT_VALUE_RTX sparc_struct_value_rtx
837 #undef TARGET_FUNCTION_VALUE
838 #define TARGET_FUNCTION_VALUE sparc_function_value
839 #undef TARGET_LIBCALL_VALUE
840 #define TARGET_LIBCALL_VALUE sparc_libcall_value
841 #undef TARGET_FUNCTION_VALUE_REGNO_P
842 #define TARGET_FUNCTION_VALUE_REGNO_P sparc_function_value_regno_p
844 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
845 #define TARGET_EXPAND_BUILTIN_SAVEREGS sparc_builtin_saveregs
847 #undef TARGET_ASAN_SHADOW_OFFSET
848 #define TARGET_ASAN_SHADOW_OFFSET sparc_asan_shadow_offset
850 #undef TARGET_EXPAND_BUILTIN_VA_START
851 #define TARGET_EXPAND_BUILTIN_VA_START sparc_va_start
852 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
853 #define TARGET_GIMPLIFY_VA_ARG_EXPR sparc_gimplify_va_arg
855 #undef TARGET_VECTOR_MODE_SUPPORTED_P
856 #define TARGET_VECTOR_MODE_SUPPORTED_P sparc_vector_mode_supported_p
858 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
859 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE sparc_preferred_simd_mode
861 #ifdef SUBTARGET_INSERT_ATTRIBUTES
862 #undef TARGET_INSERT_ATTRIBUTES
863 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
864 #endif
866 #ifdef SUBTARGET_ATTRIBUTE_TABLE
867 #undef TARGET_ATTRIBUTE_TABLE
868 #define TARGET_ATTRIBUTE_TABLE sparc_attribute_table
869 #endif
871 #undef TARGET_OPTION_OVERRIDE
872 #define TARGET_OPTION_OVERRIDE sparc_option_override
874 #ifdef TARGET_THREAD_SSP_OFFSET
875 #undef TARGET_STACK_PROTECT_GUARD
876 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
877 #endif
879 #if TARGET_GNU_TLS && defined(HAVE_AS_SPARC_UA_PCREL)
880 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
881 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
882 #endif
884 #undef TARGET_OUTPUT_CFI_DIRECTIVE
885 #define TARGET_OUTPUT_CFI_DIRECTIVE sparc_output_cfi_directive
887 #undef TARGET_DW_CFI_OPRND1_DESC
888 #define TARGET_DW_CFI_OPRND1_DESC sparc_dw_cfi_oprnd1_desc
890 #undef TARGET_ASM_FILE_END
891 #define TARGET_ASM_FILE_END sparc_file_end
893 #undef TARGET_FRAME_POINTER_REQUIRED
894 #define TARGET_FRAME_POINTER_REQUIRED sparc_frame_pointer_required
896 #undef TARGET_CAN_ELIMINATE
897 #define TARGET_CAN_ELIMINATE sparc_can_eliminate
899 #undef TARGET_PREFERRED_RELOAD_CLASS
900 #define TARGET_PREFERRED_RELOAD_CLASS sparc_preferred_reload_class
902 #undef TARGET_SECONDARY_RELOAD
903 #define TARGET_SECONDARY_RELOAD sparc_secondary_reload
904 #undef TARGET_SECONDARY_MEMORY_NEEDED
905 #define TARGET_SECONDARY_MEMORY_NEEDED sparc_secondary_memory_needed
906 #undef TARGET_SECONDARY_MEMORY_NEEDED_MODE
907 #define TARGET_SECONDARY_MEMORY_NEEDED_MODE sparc_secondary_memory_needed_mode
909 #undef TARGET_CONDITIONAL_REGISTER_USAGE
910 #define TARGET_CONDITIONAL_REGISTER_USAGE sparc_conditional_register_usage
912 #undef TARGET_INIT_PIC_REG
913 #define TARGET_INIT_PIC_REG sparc_init_pic_reg
915 #undef TARGET_USE_PSEUDO_PIC_REG
916 #define TARGET_USE_PSEUDO_PIC_REG sparc_use_pseudo_pic_reg
918 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
919 #undef TARGET_MANGLE_TYPE
920 #define TARGET_MANGLE_TYPE sparc_mangle_type
921 #endif
923 #undef TARGET_LEGITIMATE_ADDRESS_P
924 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
926 #undef TARGET_LEGITIMATE_CONSTANT_P
927 #define TARGET_LEGITIMATE_CONSTANT_P sparc_legitimate_constant_p
929 #undef TARGET_TRAMPOLINE_INIT
930 #define TARGET_TRAMPOLINE_INIT sparc_trampoline_init
932 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
933 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sparc_print_operand_punct_valid_p
934 #undef TARGET_PRINT_OPERAND
935 #define TARGET_PRINT_OPERAND sparc_print_operand
936 #undef TARGET_PRINT_OPERAND_ADDRESS
937 #define TARGET_PRINT_OPERAND_ADDRESS sparc_print_operand_address
939 /* The value stored by LDSTUB. */
940 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
941 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0xff
943 #undef TARGET_CSTORE_MODE
944 #define TARGET_CSTORE_MODE sparc_cstore_mode
946 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
947 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sparc_atomic_assign_expand_fenv
949 #undef TARGET_FIXED_CONDITION_CODE_REGS
950 #define TARGET_FIXED_CONDITION_CODE_REGS sparc_fixed_condition_code_regs
952 #undef TARGET_MIN_ARITHMETIC_PRECISION
953 #define TARGET_MIN_ARITHMETIC_PRECISION sparc_min_arithmetic_precision
955 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
956 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
958 #undef TARGET_HARD_REGNO_NREGS
959 #define TARGET_HARD_REGNO_NREGS sparc_hard_regno_nregs
960 #undef TARGET_HARD_REGNO_MODE_OK
961 #define TARGET_HARD_REGNO_MODE_OK sparc_hard_regno_mode_ok
963 #undef TARGET_MODES_TIEABLE_P
964 #define TARGET_MODES_TIEABLE_P sparc_modes_tieable_p
966 #undef TARGET_CAN_CHANGE_MODE_CLASS
967 #define TARGET_CAN_CHANGE_MODE_CLASS sparc_can_change_mode_class
969 #undef TARGET_CONSTANT_ALIGNMENT
970 #define TARGET_CONSTANT_ALIGNMENT sparc_constant_alignment
972 #undef TARGET_VECTORIZE_VEC_PERM_CONST
973 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
975 #undef TARGET_CAN_FOLLOW_JUMP
976 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
978 #undef TARGET_ZERO_CALL_USED_REGS
979 #define TARGET_ZERO_CALL_USED_REGS sparc_zero_call_used_regs
981 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
982 #define TARGET_C_MODE_FOR_FLOATING_TYPE sparc_c_mode_for_floating_type
984 struct gcc_target targetm = TARGET_INITIALIZER;
986 /* Return the memory reference contained in X if any, zero otherwise. */
988 static rtx
989 mem_ref (rtx x)
991 if (GET_CODE (x) == SIGN_EXTEND || GET_CODE (x) == ZERO_EXTEND)
992 x = XEXP (x, 0);
994 if (MEM_P (x))
995 return x;
997 return NULL_RTX;
1000 /* True if any of INSN's source register(s) is REG. */
1002 static bool
1003 insn_uses_reg_p (rtx_insn *insn, unsigned int reg)
1005 extract_insn (insn);
1006 return ((REG_P (recog_data.operand[1])
1007 && REGNO (recog_data.operand[1]) == reg)
1008 || (recog_data.n_operands == 3
1009 && REG_P (recog_data.operand[2])
1010 && REGNO (recog_data.operand[2]) == reg));
1013 /* True if INSN is a floating-point division or square-root. */
1015 static bool
1016 div_sqrt_insn_p (rtx_insn *insn)
1018 if (GET_CODE (PATTERN (insn)) != SET)
1019 return false;
1021 switch (get_attr_type (insn))
1023 case TYPE_FPDIVS:
1024 case TYPE_FPSQRTS:
1025 case TYPE_FPDIVD:
1026 case TYPE_FPSQRTD:
1027 return true;
1028 default:
1029 return false;
1033 /* True if INSN is a floating-point instruction. */
1035 static bool
1036 fpop_insn_p (rtx_insn *insn)
1038 if (GET_CODE (PATTERN (insn)) != SET)
1039 return false;
1041 switch (get_attr_type (insn))
1043 case TYPE_FPMOVE:
1044 case TYPE_FPCMOVE:
1045 case TYPE_FP:
1046 case TYPE_FPCMP:
1047 case TYPE_FPMUL:
1048 case TYPE_FPDIVS:
1049 case TYPE_FPSQRTS:
1050 case TYPE_FPDIVD:
1051 case TYPE_FPSQRTD:
1052 return true;
1053 default:
1054 return false;
1058 /* True if INSN is an atomic instruction. */
1060 static bool
1061 atomic_insn_for_leon3_p (rtx_insn *insn)
1063 switch (INSN_CODE (insn))
1065 case CODE_FOR_membar_storeload:
1066 case CODE_FOR_swapsi:
1067 case CODE_FOR_ldstub:
1068 case CODE_FOR_atomic_compare_and_swap_leon3_1:
1069 return true;
1070 default:
1071 return false;
1075 /* True if INSN is a store instruction. */
1077 static bool
1078 store_insn_p (rtx_insn *insn)
1080 if (GET_CODE (PATTERN (insn)) != SET)
1081 return false;
1083 switch (get_attr_type (insn))
1085 case TYPE_STORE:
1086 case TYPE_FPSTORE:
1087 return true;
1088 default:
1089 return false;
1093 /* True if INSN is a load instruction. */
1095 static bool
1096 load_insn_p (rtx_insn *insn)
1098 if (GET_CODE (PATTERN (insn)) != SET)
1099 return false;
1101 switch (get_attr_type (insn))
1103 case TYPE_LOAD:
1104 case TYPE_SLOAD:
1105 case TYPE_FPLOAD:
1106 return true;
1107 default:
1108 return false;
1112 /* We use a machine specific pass to enable workarounds for errata.
1114 We need to have the (essentially) final form of the insn stream in order
1115 to properly detect the various hazards. Therefore, this machine specific
1116 pass runs as late as possible. */
1118 /* True if INSN is a md pattern or asm statement. */
1119 #define USEFUL_INSN_P(INSN) \
1120 (NONDEBUG_INSN_P (INSN) \
1121 && GET_CODE (PATTERN (INSN)) != USE \
1122 && GET_CODE (PATTERN (INSN)) != CLOBBER)
1124 rtx_insn *
1125 next_active_non_empty_insn (rtx_insn *insn)
1127 insn = next_active_insn (insn);
1129 while (insn
1130 && (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
1131 || GET_CODE (PATTERN (insn)) == ASM_INPUT
1132 || get_attr_length (insn) == 0
1133 || (USEFUL_INSN_P (insn)
1134 && (asm_noperands (PATTERN (insn)) >= 0)
1135 && !strcmp (decode_asm_operands (PATTERN (insn),
1136 NULL, NULL, NULL,
1137 NULL, NULL), ""))))
1138 insn = next_active_insn (insn);
1140 return insn;
1143 static unsigned int
1144 sparc_do_work_around_errata (void)
1146 rtx_insn *insn, *next;
1147 bool find_first_useful = true;
1149 /* Force all instructions to be split into their final form. */
1150 split_all_insns_noflow ();
1152 /* Now look for specific patterns in the insn stream. */
1153 for (insn = get_insns (); insn; insn = next)
1155 bool insert_nop = false;
1156 rtx set;
1157 rtx_insn *jump;
1158 rtx_sequence *seq;
1160 /* Look into the instruction in a delay slot. */
1161 if (NONJUMP_INSN_P (insn)
1162 && (seq = dyn_cast <rtx_sequence *> (PATTERN (insn))))
1164 jump = seq->insn (0);
1165 insn = seq->insn (1);
1167 else if (JUMP_P (insn))
1168 jump = insn;
1169 else
1170 jump = NULL;
1172 /* Do not begin function with atomic instruction. */
1173 if (sparc_fix_ut700
1174 && find_first_useful
1175 && USEFUL_INSN_P (insn))
1177 find_first_useful = false;
1178 if (atomic_insn_for_leon3_p (insn))
1179 emit_insn_before (gen_nop (), insn);
1182 /* Place a NOP at the branch target of an integer branch if it is a
1183 floating-point operation or a floating-point branch. */
1184 if (sparc_fix_gr712rc
1185 && jump
1186 && jump_to_label_p (jump)
1187 && get_attr_branch_type (jump) == BRANCH_TYPE_ICC)
1189 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1190 if (target
1191 && (fpop_insn_p (target)
1192 || (JUMP_P (target)
1193 && get_attr_branch_type (target) == BRANCH_TYPE_FCC)))
1194 emit_insn_before (gen_nop (), target);
1197 /* Insert a NOP between load instruction and atomic instruction. Insert
1198 a NOP at branch target if there is a load in delay slot and an atomic
1199 instruction at branch target. */
1200 if (sparc_fix_ut700
1201 && NONJUMP_INSN_P (insn)
1202 && load_insn_p (insn))
1204 if (jump && jump_to_label_p (jump))
1206 rtx_insn *target = next_active_insn (JUMP_LABEL_AS_INSN (jump));
1207 if (target && atomic_insn_for_leon3_p (target))
1208 emit_insn_before (gen_nop (), target);
1211 next = next_active_non_empty_insn (insn);
1212 if (!next)
1213 break;
1215 if (atomic_insn_for_leon3_p (next))
1216 insert_nop = true;
1219 /* Look for a sequence that starts with a fdiv or fsqrt instruction and
1220 ends with another fdiv or fsqrt instruction with no dependencies on
1221 the former, along with an appropriate pattern in between. */
1222 if (sparc_fix_lost_divsqrt
1223 && NONJUMP_INSN_P (insn)
1224 && div_sqrt_insn_p (insn))
1226 int i;
1227 int fp_found = 0;
1228 rtx_insn *after;
1230 const unsigned int dest_reg = REGNO (SET_DEST (single_set (insn)));
1232 next = next_active_insn (insn);
1233 if (!next)
1234 break;
1236 for (after = next, i = 0; i < 4; i++)
1238 /* Count floating-point operations. */
1239 if (i != 3 && fpop_insn_p (after))
1241 /* If the insn uses the destination register of
1242 the div/sqrt, then it cannot be problematic. */
1243 if (insn_uses_reg_p (after, dest_reg))
1244 break;
1245 fp_found++;
1248 /* Count floating-point loads. */
1249 if (i != 3
1250 && (set = single_set (after)) != NULL_RTX
1251 && REG_P (SET_DEST (set))
1252 && REGNO (SET_DEST (set)) > 31)
1254 /* If the insn uses the destination register of
1255 the div/sqrt, then it cannot be problematic. */
1256 if (REGNO (SET_DEST (set)) == dest_reg)
1257 break;
1258 fp_found++;
1261 /* Check if this is a problematic sequence. */
1262 if (i > 1
1263 && fp_found >= 2
1264 && div_sqrt_insn_p (after))
1266 /* If this is the short version of the problematic
1267 sequence we add two NOPs in a row to also prevent
1268 the long version. */
1269 if (i == 2)
1270 emit_insn_before (gen_nop (), next);
1271 insert_nop = true;
1272 break;
1275 /* No need to scan past a second div/sqrt. */
1276 if (div_sqrt_insn_p (after))
1277 break;
1279 /* Insert NOP before branch. */
1280 if (i < 3
1281 && (!NONJUMP_INSN_P (after)
1282 || GET_CODE (PATTERN (after)) == SEQUENCE))
1284 insert_nop = true;
1285 break;
1288 after = next_active_insn (after);
1289 if (!after)
1290 break;
1294 /* Look for either of these two sequences:
1296 Sequence A:
1297 1. store of word size or less (e.g. st / stb / sth / stf)
1298 2. any single instruction that is not a load or store
1299 3. any store instruction (e.g. st / stb / sth / stf / std / stdf)
1301 Sequence B:
1302 1. store of double word size (e.g. std / stdf)
1303 2. any store instruction (e.g. st / stb / sth / stf / std / stdf) */
1304 if (sparc_fix_b2bst
1305 && NONJUMP_INSN_P (insn)
1306 && (set = single_set (insn)) != NULL_RTX
1307 && store_insn_p (insn))
1309 /* Sequence B begins with a double-word store. */
1310 bool seq_b = GET_MODE_SIZE (GET_MODE (SET_DEST (set))) == 8;
1311 rtx_insn *after;
1312 int i;
1314 next = next_active_non_empty_insn (insn);
1315 if (!next)
1316 break;
1318 for (after = next, i = 0; i < 2; i++)
1320 /* If the insn is a branch, then it cannot be problematic. */
1321 if (!NONJUMP_INSN_P (after)
1322 || GET_CODE (PATTERN (after)) == SEQUENCE)
1323 break;
1325 /* Sequence B is only two instructions long. */
1326 if (seq_b)
1328 /* Add NOP if followed by a store. */
1329 if (store_insn_p (after))
1330 insert_nop = true;
1332 /* Otherwise it is ok. */
1333 break;
1336 /* If the second instruction is a load or a store,
1337 then the sequence cannot be problematic. */
1338 if (i == 0)
1340 if ((set = single_set (after)) != NULL_RTX
1341 && (MEM_P (SET_DEST (set)) || mem_ref (SET_SRC (set))))
1342 break;
1344 after = next_active_non_empty_insn (after);
1345 if (!after)
1346 break;
1349 /* Add NOP if third instruction is a store. */
1350 if (i == 1
1351 && store_insn_p (after))
1352 insert_nop = true;
1356 /* Look for a single-word load into an odd-numbered FP register. */
1357 else if (sparc_fix_at697f
1358 && NONJUMP_INSN_P (insn)
1359 && (set = single_set (insn)) != NULL_RTX
1360 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1361 && mem_ref (SET_SRC (set))
1362 && REG_P (SET_DEST (set))
1363 && REGNO (SET_DEST (set)) > 31
1364 && REGNO (SET_DEST (set)) % 2 != 0)
1366 /* The wrong dependency is on the enclosing double register. */
1367 const unsigned int x = REGNO (SET_DEST (set)) - 1;
1368 unsigned int src1, src2, dest;
1369 int code;
1371 next = next_active_insn (insn);
1372 if (!next)
1373 break;
1374 /* If the insn is a branch, then it cannot be problematic. */
1375 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1376 continue;
1378 extract_insn (next);
1379 code = INSN_CODE (next);
1381 switch (code)
1383 case CODE_FOR_adddf3:
1384 case CODE_FOR_subdf3:
1385 case CODE_FOR_muldf3:
1386 case CODE_FOR_divdf3:
1387 dest = REGNO (recog_data.operand[0]);
1388 src1 = REGNO (recog_data.operand[1]);
1389 src2 = REGNO (recog_data.operand[2]);
1390 if (src1 != src2)
1392 /* Case [1-4]:
1393 ld [address], %fx+1
1394 FPOPd %f{x,y}, %f{y,x}, %f{x,y} */
1395 if ((src1 == x || src2 == x)
1396 && (dest == src1 || dest == src2))
1397 insert_nop = true;
1399 else
1401 /* Case 5:
1402 ld [address], %fx+1
1403 FPOPd %fx, %fx, %fx */
1404 if (src1 == x
1405 && dest == src1
1406 && (code == CODE_FOR_adddf3 || code == CODE_FOR_muldf3))
1407 insert_nop = true;
1409 break;
1411 case CODE_FOR_sqrtdf2:
1412 dest = REGNO (recog_data.operand[0]);
1413 src1 = REGNO (recog_data.operand[1]);
1414 /* Case 6:
1415 ld [address], %fx+1
1416 fsqrtd %fx, %fx */
1417 if (src1 == x && dest == src1)
1418 insert_nop = true;
1419 break;
1421 default:
1422 break;
1426 /* Look for a single-word load into an integer register. */
1427 else if (sparc_fix_ut699
1428 && NONJUMP_INSN_P (insn)
1429 && (set = single_set (insn)) != NULL_RTX
1430 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) <= 4
1431 && (mem_ref (SET_SRC (set)) != NULL_RTX
1432 || INSN_CODE (insn) == CODE_FOR_movsi_pic_gotdata_op)
1433 && REG_P (SET_DEST (set))
1434 && REGNO (SET_DEST (set)) < 32)
1436 /* There is no problem if the second memory access has a data
1437 dependency on the first single-cycle load. */
1438 rtx x = SET_DEST (set);
1440 next = next_active_insn (insn);
1441 if (!next)
1442 break;
1443 /* If the insn is a branch, then it cannot be problematic. */
1444 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1445 continue;
1447 /* Look for a second memory access to/from an integer register. */
1448 if ((set = single_set (next)) != NULL_RTX)
1450 rtx src = SET_SRC (set);
1451 rtx dest = SET_DEST (set);
1452 rtx mem;
1454 /* LDD is affected. */
1455 if ((mem = mem_ref (src)) != NULL_RTX
1456 && REG_P (dest)
1457 && REGNO (dest) < 32
1458 && !reg_mentioned_p (x, XEXP (mem, 0)))
1459 insert_nop = true;
1461 /* STD is *not* affected. */
1462 else if (MEM_P (dest)
1463 && GET_MODE_SIZE (GET_MODE (dest)) <= 4
1464 && (src == CONST0_RTX (GET_MODE (dest))
1465 || (REG_P (src)
1466 && REGNO (src) < 32
1467 && REGNO (src) != REGNO (x)))
1468 && !reg_mentioned_p (x, XEXP (dest, 0)))
1469 insert_nop = true;
1471 /* GOT accesses uses LD. */
1472 else if (INSN_CODE (next) == CODE_FOR_movsi_pic_gotdata_op
1473 && !reg_mentioned_p (x, XEXP (XEXP (src, 0), 1)))
1474 insert_nop = true;
1478 /* Look for a single-word load/operation into an FP register. */
1479 else if (sparc_fix_ut699
1480 && NONJUMP_INSN_P (insn)
1481 && (set = single_set (insn)) != NULL_RTX
1482 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1483 && REG_P (SET_DEST (set))
1484 && REGNO (SET_DEST (set)) > 31)
1486 /* Number of instructions in the problematic window. */
1487 const int n_insns = 4;
1488 /* The problematic combination is with the sibling FP register. */
1489 const unsigned int x = REGNO (SET_DEST (set));
1490 const unsigned int y = x ^ 1;
1491 rtx_insn *after;
1492 int i;
1494 next = next_active_insn (insn);
1495 if (!next)
1496 break;
1497 /* If the insn is a branch, then it cannot be problematic. */
1498 if (!NONJUMP_INSN_P (next) || GET_CODE (PATTERN (next)) == SEQUENCE)
1499 continue;
1501 /* Look for a second load/operation into the sibling FP register. */
1502 if (!((set = single_set (next)) != NULL_RTX
1503 && GET_MODE_SIZE (GET_MODE (SET_SRC (set))) == 4
1504 && REG_P (SET_DEST (set))
1505 && REGNO (SET_DEST (set)) == y))
1506 continue;
1508 /* Look for a (possible) store from the FP register in the next N
1509 instructions, but bail out if it is again modified or if there
1510 is a store from the sibling FP register before this store. */
1511 for (after = next, i = 0; i < n_insns; i++)
1513 bool branch_p;
1515 after = next_active_insn (after);
1516 if (!after)
1517 break;
1519 /* This is a branch with an empty delay slot. */
1520 if (!NONJUMP_INSN_P (after))
1522 if (++i == n_insns)
1523 break;
1524 branch_p = true;
1525 after = NULL;
1527 /* This is a branch with a filled delay slot. */
1528 else if (rtx_sequence *seq =
1529 dyn_cast <rtx_sequence *> (PATTERN (after)))
1531 if (++i == n_insns)
1532 break;
1533 branch_p = true;
1534 after = seq->insn (1);
1536 /* This is a regular instruction. */
1537 else
1538 branch_p = false;
1540 if (after && (set = single_set (after)) != NULL_RTX)
1542 const rtx src = SET_SRC (set);
1543 const rtx dest = SET_DEST (set);
1544 const unsigned int size = GET_MODE_SIZE (GET_MODE (dest));
1546 /* If the FP register is again modified before the store,
1547 then the store isn't affected. */
1548 if (REG_P (dest)
1549 && (REGNO (dest) == x
1550 || (REGNO (dest) == y && size == 8)))
1551 break;
1553 if (MEM_P (dest) && REG_P (src))
1555 /* If there is a store from the sibling FP register
1556 before the store, then the store is not affected. */
1557 if (REGNO (src) == y || (REGNO (src) == x && size == 8))
1558 break;
1560 /* Otherwise, the store is affected. */
1561 if (REGNO (src) == x && size == 4)
1563 insert_nop = true;
1564 break;
1569 /* If we have a branch in the first M instructions, then we
1570 cannot see the (M+2)th instruction so we play safe. */
1571 if (branch_p && i <= (n_insns - 2))
1573 insert_nop = true;
1574 break;
1579 else
1580 next = NEXT_INSN (insn);
1582 if (insert_nop)
1583 emit_insn_before (gen_nop (), next);
1586 return 0;
1589 namespace {
1591 const pass_data pass_data_work_around_errata =
1593 RTL_PASS, /* type */
1594 "errata", /* name */
1595 OPTGROUP_NONE, /* optinfo_flags */
1596 TV_MACH_DEP, /* tv_id */
1597 0, /* properties_required */
1598 0, /* properties_provided */
1599 0, /* properties_destroyed */
1600 0, /* todo_flags_start */
1601 0, /* todo_flags_finish */
1604 class pass_work_around_errata : public rtl_opt_pass
1606 public:
1607 pass_work_around_errata(gcc::context *ctxt)
1608 : rtl_opt_pass(pass_data_work_around_errata, ctxt)
1611 /* opt_pass methods: */
1612 virtual bool gate (function *)
1614 return sparc_fix_at697f
1615 || sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc
1616 || sparc_fix_b2bst || sparc_fix_lost_divsqrt;
1619 virtual unsigned int execute (function *)
1621 return sparc_do_work_around_errata ();
1624 }; // class pass_work_around_errata
1626 } // anon namespace
1628 rtl_opt_pass *
1629 make_pass_work_around_errata (gcc::context *ctxt)
1631 return new pass_work_around_errata (ctxt);
1634 /* Helpers for TARGET_DEBUG_OPTIONS. */
1635 static void
1636 dump_target_flag_bits (const int flags)
1638 if (flags & MASK_64BIT)
1639 fprintf (stderr, "64BIT ");
1640 if (flags & MASK_APP_REGS)
1641 fprintf (stderr, "APP_REGS ");
1642 if (flags & MASK_FASTER_STRUCTS)
1643 fprintf (stderr, "FASTER_STRUCTS ");
1644 if (flags & MASK_FLAT)
1645 fprintf (stderr, "FLAT ");
1646 if (flags & MASK_FMAF)
1647 fprintf (stderr, "FMAF ");
1648 if (flags & MASK_FSMULD)
1649 fprintf (stderr, "FSMULD ");
1650 if (flags & MASK_FPU)
1651 fprintf (stderr, "FPU ");
1652 if (flags & MASK_HARD_QUAD)
1653 fprintf (stderr, "HARD_QUAD ");
1654 if (flags & MASK_POPC)
1655 fprintf (stderr, "POPC ");
1656 if (flags & MASK_PTR64)
1657 fprintf (stderr, "PTR64 ");
1658 if (flags & MASK_STACK_BIAS)
1659 fprintf (stderr, "STACK_BIAS ");
1660 if (flags & MASK_UNALIGNED_DOUBLES)
1661 fprintf (stderr, "UNALIGNED_DOUBLES ");
1662 if (flags & MASK_V8PLUS)
1663 fprintf (stderr, "V8PLUS ");
1664 if (flags & MASK_VIS)
1665 fprintf (stderr, "VIS ");
1666 if (flags & MASK_VIS2)
1667 fprintf (stderr, "VIS2 ");
1668 if (flags & MASK_VIS3)
1669 fprintf (stderr, "VIS3 ");
1670 if (flags & MASK_VIS4)
1671 fprintf (stderr, "VIS4 ");
1672 if (flags & MASK_VIS4B)
1673 fprintf (stderr, "VIS4B ");
1674 if (flags & MASK_CBCOND)
1675 fprintf (stderr, "CBCOND ");
1676 if (flags & MASK_DEPRECATED_V8_INSNS)
1677 fprintf (stderr, "DEPRECATED_V8_INSNS ");
1678 if (flags & MASK_LEON)
1679 fprintf (stderr, "LEON ");
1680 if (flags & MASK_LEON3)
1681 fprintf (stderr, "LEON3 ");
1682 if (flags & MASK_SPARCLET)
1683 fprintf (stderr, "SPARCLET ");
1684 if (flags & MASK_SPARCLITE)
1685 fprintf (stderr, "SPARCLITE ");
1686 if (flags & MASK_V8)
1687 fprintf (stderr, "V8 ");
1688 if (flags & MASK_V9)
1689 fprintf (stderr, "V9 ");
1692 static void
1693 dump_target_flags (const char *prefix, const int flags)
1695 fprintf (stderr, "%s: (%08x) [ ", prefix, flags);
1696 dump_target_flag_bits (flags);
1697 fprintf(stderr, "]\n");
1700 /* Validate and override various options, and do some machine dependent
1701 initialization. */
1703 static void
1704 sparc_option_override (void)
1706 /* Map TARGET_CPU_DEFAULT to value for -m{cpu,tune}=. */
1707 static struct cpu_default {
1708 const int cpu;
1709 const enum sparc_processor_type processor;
1710 } const cpu_default[] = {
1711 /* There must be one entry here for each TARGET_CPU value. */
1712 { TARGET_CPU_sparc, PROCESSOR_CYPRESS },
1713 { TARGET_CPU_v8, PROCESSOR_V8 },
1714 { TARGET_CPU_supersparc, PROCESSOR_SUPERSPARC },
1715 { TARGET_CPU_hypersparc, PROCESSOR_HYPERSPARC },
1716 { TARGET_CPU_leon, PROCESSOR_LEON },
1717 { TARGET_CPU_leon3, PROCESSOR_LEON3 },
1718 { TARGET_CPU_leon5, PROCESSOR_LEON5 },
1719 { TARGET_CPU_leon3v7, PROCESSOR_LEON3V7 },
1720 { TARGET_CPU_sparclite, PROCESSOR_F930 },
1721 { TARGET_CPU_sparclite86x, PROCESSOR_SPARCLITE86X },
1722 { TARGET_CPU_sparclet, PROCESSOR_TSC701 },
1723 { TARGET_CPU_v9, PROCESSOR_V9 },
1724 { TARGET_CPU_ultrasparc, PROCESSOR_ULTRASPARC },
1725 { TARGET_CPU_ultrasparc3, PROCESSOR_ULTRASPARC3 },
1726 { TARGET_CPU_niagara, PROCESSOR_NIAGARA },
1727 { TARGET_CPU_niagara2, PROCESSOR_NIAGARA2 },
1728 { TARGET_CPU_niagara3, PROCESSOR_NIAGARA3 },
1729 { TARGET_CPU_niagara4, PROCESSOR_NIAGARA4 },
1730 { TARGET_CPU_niagara7, PROCESSOR_NIAGARA7 },
1731 { TARGET_CPU_m8, PROCESSOR_M8 },
1732 { -1, PROCESSOR_V7 }
1734 const struct cpu_default *def;
1735 /* Table of values for -m{cpu,tune}=. This must match the order of
1736 the enum processor_type in sparc-opts.h. */
1737 static struct cpu_table {
1738 const char *const name;
1739 const int disable;
1740 const int enable;
1741 } const cpu_table[] = {
1742 { "v7", MASK_ISA, 0 },
1743 { "cypress", MASK_ISA, 0 },
1744 { "v8", MASK_ISA, MASK_V8 },
1745 /* TI TMS390Z55 supersparc */
1746 { "supersparc", MASK_ISA, MASK_V8 },
1747 { "hypersparc", MASK_ISA, MASK_V8 },
1748 { "leon", MASK_ISA|MASK_FSMULD, MASK_V8|MASK_LEON },
1749 { "leon3", MASK_ISA, MASK_V8|MASK_LEON3 },
1750 { "leon5", MASK_ISA, MASK_V8|MASK_LEON3 },
1751 { "leon3v7", MASK_ISA, MASK_LEON3 },
1752 { "sparclite", MASK_ISA, MASK_SPARCLITE },
1753 /* The Fujitsu MB86930 is the original sparclite chip, with no FPU. */
1754 { "f930", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1755 /* The Fujitsu MB86934 is the recent sparclite chip, with an FPU. */
1756 { "f934", MASK_ISA, MASK_SPARCLITE },
1757 { "sparclite86x", MASK_ISA|MASK_FPU, MASK_SPARCLITE },
1758 { "sparclet", MASK_ISA, MASK_SPARCLET },
1759 /* TEMIC sparclet */
1760 { "tsc701", MASK_ISA, MASK_SPARCLET },
1761 { "v9", MASK_ISA, MASK_V9 },
1762 /* UltraSPARC I, II, IIi */
1763 { "ultrasparc", MASK_ISA,
1764 /* Although insns using %y are deprecated, it is a clear win. */
1765 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1766 /* UltraSPARC III */
1767 /* ??? Check if %y issue still holds true. */
1768 { "ultrasparc3", MASK_ISA,
1769 MASK_V9|MASK_DEPRECATED_V8_INSNS|MASK_VIS2 },
1770 /* UltraSPARC T1 */
1771 { "niagara", MASK_ISA,
1772 MASK_V9|MASK_DEPRECATED_V8_INSNS },
1773 /* UltraSPARC T2 */
1774 { "niagara2", MASK_ISA,
1775 MASK_V9|MASK_POPC|MASK_VIS2 },
1776 /* UltraSPARC T3 */
1777 { "niagara3", MASK_ISA,
1778 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF },
1779 /* UltraSPARC T4 */
1780 { "niagara4", MASK_ISA,
1781 MASK_V9|MASK_POPC|MASK_VIS3|MASK_FMAF|MASK_CBCOND },
1782 /* UltraSPARC M7 */
1783 { "niagara7", MASK_ISA,
1784 MASK_V9|MASK_POPC|MASK_VIS4|MASK_FMAF|MASK_CBCOND|MASK_SUBXC },
1785 /* UltraSPARC M8 */
1786 { "m8", MASK_ISA,
1787 MASK_V9|MASK_POPC|MASK_VIS4B|MASK_FMAF|MASK_CBCOND|MASK_SUBXC }
1789 const struct cpu_table *cpu;
1790 unsigned int i;
1792 if (sparc_debug_string != NULL)
1794 const char *q;
1795 char *p;
1797 p = ASTRDUP (sparc_debug_string);
1798 while ((q = strtok (p, ",")) != NULL)
1800 bool invert;
1801 int mask;
1803 p = NULL;
1804 if (*q == '!')
1806 invert = true;
1807 q++;
1809 else
1810 invert = false;
1812 if (! strcmp (q, "all"))
1813 mask = MASK_DEBUG_ALL;
1814 else if (! strcmp (q, "options"))
1815 mask = MASK_DEBUG_OPTIONS;
1816 else
1817 error ("unknown %<-mdebug-%s%> switch", q);
1819 if (invert)
1820 sparc_debug &= ~mask;
1821 else
1822 sparc_debug |= mask;
1826 /* Enable the FsMULd instruction by default if not explicitly specified by
1827 the user. It may be later disabled by the CPU (explicitly or not). */
1828 if (TARGET_FPU && !(target_flags_explicit & MASK_FSMULD))
1829 target_flags |= MASK_FSMULD;
1831 if (TARGET_DEBUG_OPTIONS)
1833 dump_target_flags("Initial target_flags", target_flags);
1834 dump_target_flags("target_flags_explicit", target_flags_explicit);
1837 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1838 SUBTARGET_OVERRIDE_OPTIONS;
1839 #endif
1841 #ifndef SPARC_BI_ARCH
1842 /* Check for unsupported architecture size. */
1843 if (!TARGET_64BIT != DEFAULT_ARCH32_P)
1844 error ("%s is not supported by this configuration",
1845 DEFAULT_ARCH32_P ? "-m64" : "-m32");
1846 #endif
1848 /* We force all 64bit archs to use 128 bit long double */
1849 if (TARGET_ARCH64 && !TARGET_LONG_DOUBLE_128)
1851 error ("%<-mlong-double-64%> not allowed with %<-m64%>");
1852 target_flags |= MASK_LONG_DOUBLE_128;
1855 /* Check that -fcall-saved-REG wasn't specified for out registers. */
1856 for (i = 8; i < 16; i++)
1857 if (!call_used_regs [i])
1859 error ("%<-fcall-saved-REG%> is not supported for out registers");
1860 call_used_regs [i] = 1;
1863 /* Set the default CPU if no -mcpu option was specified. */
1864 if (!OPTION_SET_P (sparc_cpu_and_features))
1866 for (def = &cpu_default[0]; def->cpu != -1; ++def)
1867 if (def->cpu == TARGET_CPU_DEFAULT)
1868 break;
1869 gcc_assert (def->cpu != -1);
1870 sparc_cpu_and_features = def->processor;
1873 /* Set the default CPU if no -mtune option was specified. */
1874 if (!OPTION_SET_P (sparc_cpu))
1875 sparc_cpu = sparc_cpu_and_features;
1877 cpu = &cpu_table[(int) sparc_cpu_and_features];
1879 if (TARGET_DEBUG_OPTIONS)
1881 fprintf (stderr, "sparc_cpu_and_features: %s\n", cpu->name);
1882 dump_target_flags ("cpu->disable", cpu->disable);
1883 dump_target_flags ("cpu->enable", cpu->enable);
1886 target_flags &= ~cpu->disable;
1887 target_flags |= (cpu->enable
1888 #ifndef HAVE_AS_FMAF_HPC_VIS3
1889 & ~(MASK_FMAF | MASK_VIS3)
1890 #endif
1891 #ifndef HAVE_AS_SPARC4
1892 & ~MASK_CBCOND
1893 #endif
1894 #ifndef HAVE_AS_SPARC5_VIS4
1895 & ~(MASK_VIS4 | MASK_SUBXC)
1896 #endif
1897 #ifndef HAVE_AS_SPARC6
1898 & ~(MASK_VIS4B)
1899 #endif
1900 #ifndef HAVE_AS_LEON
1901 & ~(MASK_LEON | MASK_LEON3)
1902 #endif
1903 & ~(target_flags_explicit & MASK_FEATURES)
1906 /* FsMULd is a V8 instruction. */
1907 if (!TARGET_V8 && !TARGET_V9)
1908 target_flags &= ~MASK_FSMULD;
1910 /* -mvis2 implies -mvis. */
1911 if (TARGET_VIS2)
1912 target_flags |= MASK_VIS;
1914 /* -mvis3 implies -mvis2 and -mvis. */
1915 if (TARGET_VIS3)
1916 target_flags |= MASK_VIS2 | MASK_VIS;
1918 /* -mvis4 implies -mvis3, -mvis2 and -mvis. */
1919 if (TARGET_VIS4)
1920 target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1922 /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
1923 if (TARGET_VIS4B)
1924 target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
1926 /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
1927 FPU is disabled. */
1928 if (!TARGET_FPU)
1929 target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
1930 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
1932 /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
1933 are available; -m64 also implies v9. */
1934 if (TARGET_VIS || TARGET_ARCH64)
1936 target_flags |= MASK_V9;
1937 target_flags &= ~(MASK_V8 | MASK_SPARCLET | MASK_SPARCLITE);
1940 /* -mvis also implies -mv8plus on 32-bit. */
1941 if (TARGET_VIS && !TARGET_ARCH64)
1942 target_flags |= MASK_V8PLUS;
1944 /* Use the deprecated v8 insns for sparc64 in 32-bit mode. */
1945 if (TARGET_V9 && TARGET_ARCH32)
1946 target_flags |= MASK_DEPRECATED_V8_INSNS;
1948 /* V8PLUS requires V9 and makes no sense in 64-bit mode. */
1949 if (!TARGET_V9 || TARGET_ARCH64)
1950 target_flags &= ~MASK_V8PLUS;
1952 /* Don't use stack biasing in 32-bit mode. */
1953 if (TARGET_ARCH32)
1954 target_flags &= ~MASK_STACK_BIAS;
1956 /* Enable applicable errata workarounds for LEON3FT. */
1957 if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
1959 sparc_fix_b2bst = 1;
1960 sparc_fix_lost_divsqrt = 1;
1963 /* Disable FsMULd for the UT699 since it doesn't work correctly. */
1964 if (sparc_fix_ut699)
1965 target_flags &= ~MASK_FSMULD;
1967 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
1968 if (!(target_flags_explicit & MASK_LONG_DOUBLE_128))
1969 target_flags |= MASK_LONG_DOUBLE_128;
1970 #endif
1972 if (TARGET_DEBUG_OPTIONS)
1973 dump_target_flags ("Final target_flags", target_flags);
1975 /* Set the code model if no -mcmodel option was specified. */
1976 if (OPTION_SET_P (sparc_code_model))
1978 if (TARGET_ARCH32)
1979 error ("%<-mcmodel=%> is not supported in 32-bit mode");
1981 else
1983 if (TARGET_ARCH32)
1984 sparc_code_model = CM_32;
1985 else
1986 sparc_code_model = SPARC_DEFAULT_CMODEL;
1989 /* Set the memory model if no -mmemory-model option was specified. */
1990 if (!OPTION_SET_P (sparc_memory_model))
1992 /* Choose the memory model for the operating system. */
1993 enum sparc_memory_model_type os_default = SUBTARGET_DEFAULT_MEMORY_MODEL;
1994 if (os_default != SMM_DEFAULT)
1995 sparc_memory_model = os_default;
1996 /* Choose the most relaxed model for the processor. */
1997 else if (TARGET_V9)
1998 sparc_memory_model = SMM_RMO;
1999 else if (TARGET_LEON3)
2000 sparc_memory_model = SMM_TSO;
2001 else if (TARGET_LEON)
2002 sparc_memory_model = SMM_SC;
2003 else if (TARGET_V8)
2004 sparc_memory_model = SMM_PSO;
2005 else
2006 sparc_memory_model = SMM_SC;
2009 /* Supply a default value for align_functions. */
2010 if (flag_align_functions && !str_align_functions)
2012 if (sparc_cpu == PROCESSOR_ULTRASPARC
2013 || sparc_cpu == PROCESSOR_ULTRASPARC3
2014 || sparc_cpu == PROCESSOR_NIAGARA
2015 || sparc_cpu == PROCESSOR_NIAGARA2
2016 || sparc_cpu == PROCESSOR_NIAGARA3
2017 || sparc_cpu == PROCESSOR_NIAGARA4)
2018 str_align_functions = "32";
2019 else if (sparc_cpu == PROCESSOR_NIAGARA7
2020 || sparc_cpu == PROCESSOR_M8)
2021 str_align_functions = "64";
2024 /* Validate PCC_STRUCT_RETURN. */
2025 if (flag_pcc_struct_return == DEFAULT_PCC_STRUCT_RETURN)
2026 flag_pcc_struct_return = (TARGET_ARCH64 ? 0 : 1);
2028 /* Only use .uaxword when compiling for a 64-bit target. */
2029 if (!TARGET_ARCH64)
2030 targetm.asm_out.unaligned_op.di = NULL;
2032 /* Set the processor costs. */
2033 switch (sparc_cpu)
2035 case PROCESSOR_V7:
2036 case PROCESSOR_CYPRESS:
2037 sparc_costs = &cypress_costs;
2038 break;
2039 case PROCESSOR_V8:
2040 case PROCESSOR_SPARCLITE:
2041 case PROCESSOR_SUPERSPARC:
2042 sparc_costs = &supersparc_costs;
2043 break;
2044 case PROCESSOR_F930:
2045 case PROCESSOR_F934:
2046 case PROCESSOR_HYPERSPARC:
2047 case PROCESSOR_SPARCLITE86X:
2048 sparc_costs = &hypersparc_costs;
2049 break;
2050 case PROCESSOR_LEON:
2051 sparc_costs = &leon_costs;
2052 break;
2053 case PROCESSOR_LEON3:
2054 case PROCESSOR_LEON3V7:
2055 sparc_costs = &leon3_costs;
2056 break;
2057 case PROCESSOR_LEON5:
2058 sparc_costs = &leon5_costs;
2059 break;
2060 case PROCESSOR_SPARCLET:
2061 case PROCESSOR_TSC701:
2062 sparc_costs = &sparclet_costs;
2063 break;
2064 case PROCESSOR_V9:
2065 case PROCESSOR_ULTRASPARC:
2066 sparc_costs = &ultrasparc_costs;
2067 break;
2068 case PROCESSOR_ULTRASPARC3:
2069 sparc_costs = &ultrasparc3_costs;
2070 break;
2071 case PROCESSOR_NIAGARA:
2072 sparc_costs = &niagara_costs;
2073 break;
2074 case PROCESSOR_NIAGARA2:
2075 sparc_costs = &niagara2_costs;
2076 break;
2077 case PROCESSOR_NIAGARA3:
2078 sparc_costs = &niagara3_costs;
2079 break;
2080 case PROCESSOR_NIAGARA4:
2081 sparc_costs = &niagara4_costs;
2082 break;
2083 case PROCESSOR_NIAGARA7:
2084 sparc_costs = &niagara7_costs;
2085 break;
2086 case PROCESSOR_M8:
2087 sparc_costs = &m8_costs;
2088 break;
2089 case PROCESSOR_NATIVE:
2090 gcc_unreachable ();
2093 /* param_simultaneous_prefetches is the number of prefetches that
2094 can run at the same time. More important, it is the threshold
2095 defining when additional prefetches will be dropped by the
2096 hardware.
2098 The UltraSPARC-III features a documented prefetch queue with a
2099 size of 8. Additional prefetches issued in the cpu are
2100 dropped.
2102 Niagara processors are different. In these processors prefetches
2103 are handled much like regular loads. The L1 miss buffer is 32
2104 entries, but prefetches start getting affected when 30 entries
2105 become occupied. That occupation could be a mix of regular loads
2106 and prefetches though. And that buffer is shared by all threads.
2107 Once the threshold is reached, if the core is running a single
2108 thread the prefetch will retry. If more than one thread is
2109 running, the prefetch will be dropped.
2111 All this makes it very difficult to determine how many
2112 simultaneous prefetches can be issued simultaneously, even in a
2113 single-threaded program. Experimental results show that setting
2114 this parameter to 32 works well when the number of threads is not
2115 high. */
2116 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2117 param_simultaneous_prefetches,
2118 ((sparc_cpu == PROCESSOR_ULTRASPARC
2119 || sparc_cpu == PROCESSOR_NIAGARA
2120 || sparc_cpu == PROCESSOR_NIAGARA2
2121 || sparc_cpu == PROCESSOR_NIAGARA3
2122 || sparc_cpu == PROCESSOR_NIAGARA4)
2124 : (sparc_cpu == PROCESSOR_ULTRASPARC3
2125 ? 8 : ((sparc_cpu == PROCESSOR_NIAGARA7
2126 || sparc_cpu == PROCESSOR_M8)
2127 ? 32 : 3))));
2129 /* param_l1_cache_line_size is the size of the L1 cache line, in
2130 bytes.
2132 The Oracle SPARC Architecture (previously the UltraSPARC
2133 Architecture) specification states that when a PREFETCH[A]
2134 instruction is executed an implementation-specific amount of data
2135 is prefetched, and that it is at least 64 bytes long (aligned to
2136 at least 64 bytes).
2138 However, this is not correct. The M7 (and implementations prior
2139 to that) does not guarantee a 64B prefetch into a cache if the
2140 line size is smaller. A single cache line is all that is ever
2141 prefetched. So for the M7, where the L1D$ has 32B lines and the
2142 L2D$ and L3 have 64B lines, a prefetch will prefetch 64B into the
2143 L2 and L3, but only 32B are brought into the L1D$. (Assuming it
2144 is a read_n prefetch, which is the only type which allocates to
2145 the L1.) */
2146 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2147 param_l1_cache_line_size,
2148 (sparc_cpu == PROCESSOR_M8 ? 64 : 32));
2150 /* param_l1_cache_size is the size of the L1D$ (most SPARC chips use
2151 Hardvard level-1 caches) in kilobytes. Both UltraSPARC and
2152 Niagara processors feature a L1D$ of 16KB. */
2153 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2154 param_l1_cache_size,
2155 ((sparc_cpu == PROCESSOR_ULTRASPARC
2156 || sparc_cpu == PROCESSOR_ULTRASPARC3
2157 || sparc_cpu == PROCESSOR_NIAGARA
2158 || sparc_cpu == PROCESSOR_NIAGARA2
2159 || sparc_cpu == PROCESSOR_NIAGARA3
2160 || sparc_cpu == PROCESSOR_NIAGARA4
2161 || sparc_cpu == PROCESSOR_NIAGARA7
2162 || sparc_cpu == PROCESSOR_M8)
2163 ? 16 : 64));
2165 /* param_l2_cache_size is the size fo the L2 in kilobytes. Note
2166 that 512 is the default in params.def. */
2167 SET_OPTION_IF_UNSET (&global_options, &global_options_set,
2168 param_l2_cache_size,
2169 ((sparc_cpu == PROCESSOR_NIAGARA4
2170 || sparc_cpu == PROCESSOR_M8)
2171 ? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
2172 ? 256 : 512)));
2175 /* Disable save slot sharing for call-clobbered registers by default.
2176 The IRA sharing algorithm works on single registers only and this
2177 pessimizes for double floating-point registers. */
2178 if (!OPTION_SET_P (flag_ira_share_save_slots))
2179 flag_ira_share_save_slots = 0;
2181 /* Only enable REE by default in 64-bit mode where it helps to eliminate
2182 redundant 32-to-64-bit extensions. */
2183 if (!OPTION_SET_P (flag_ree) && TARGET_ARCH32)
2184 flag_ree = 0;
2186 /* Do various machine dependent initializations. */
2187 sparc_init_modes ();
2189 /* Set up function hooks. */
2190 init_machine_status = sparc_init_machine_status;
2193 /* Miscellaneous utilities. */
2195 /* Nonzero if CODE, a comparison, is suitable for use in v9 conditional move
2196 or branch on register contents instructions. */
2199 v9_regcmp_p (enum rtx_code code)
2201 return (code == EQ || code == NE || code == GE || code == LT
2202 || code == LE || code == GT);
2205 /* Nonzero if OP is a floating point constant which can
2206 be loaded into an integer register using a single
2207 sethi instruction. */
2210 fp_sethi_p (rtx op)
2212 if (GET_CODE (op) == CONST_DOUBLE)
2214 long i;
2216 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2217 return !SPARC_SIMM13_P (i) && SPARC_SETHI_P (i);
2220 return 0;
2223 /* Nonzero if OP is a floating point constant which can
2224 be loaded into an integer register using a single
2225 mov instruction. */
2228 fp_mov_p (rtx op)
2230 if (GET_CODE (op) == CONST_DOUBLE)
2232 long i;
2234 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2235 return SPARC_SIMM13_P (i);
2238 return 0;
2241 /* Nonzero if OP is a floating point constant which can
2242 be loaded into an integer register using a high/losum
2243 instruction sequence. */
2246 fp_high_losum_p (rtx op)
2248 /* The constraints calling this should only be in
2249 SFmode move insns, so any constant which cannot
2250 be moved using a single insn will do. */
2251 if (GET_CODE (op) == CONST_DOUBLE)
2253 long i;
2255 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (op), i);
2256 return !SPARC_SIMM13_P (i) && !SPARC_SETHI_P (i);
2259 return 0;
2262 /* Return true if the address of LABEL can be loaded by means of the
2263 mov{si,di}_pic_label_ref patterns in PIC mode. */
2265 static bool
2266 can_use_mov_pic_label_ref (rtx label)
2268 /* VxWorks does not impose a fixed gap between segments; the run-time
2269 gap can be different from the object-file gap. We therefore can't
2270 assume X - _GLOBAL_OFFSET_TABLE_ is a link-time constant unless we
2271 are absolutely sure that X is in the same segment as the GOT.
2272 Unfortunately, the flexibility of linker scripts means that we
2273 can't be sure of that in general, so assume that GOT-relative
2274 accesses are never valid on VxWorks. */
2275 if (TARGET_VXWORKS_RTP)
2276 return false;
2278 /* Similarly, if the label is non-local, it might end up being placed
2279 in a different section than the current one; now mov_pic_label_ref
2280 requires the label and the code to be in the same section. */
2281 if (LABEL_REF_NONLOCAL_P (label))
2282 return false;
2284 /* Finally, if we are reordering basic blocks and partition into hot
2285 and cold sections, this might happen for any label. */
2286 if (flag_reorder_blocks_and_partition)
2287 return false;
2289 return true;
2292 /* Expand a move instruction. Return true if all work is done. */
2294 bool
2295 sparc_expand_move (machine_mode mode, rtx *operands)
2297 /* Handle sets of MEM first. */
2298 if (GET_CODE (operands[0]) == MEM)
2300 /* 0 is a register (or a pair of registers) on SPARC. */
2301 if (register_or_zero_operand (operands[1], mode))
2302 return false;
2304 if (!reload_in_progress)
2306 operands[0] = validize_mem (operands[0]);
2307 operands[1] = force_reg (mode, operands[1]);
2311 /* Fix up TLS cases. */
2312 if (TARGET_HAVE_TLS
2313 && CONSTANT_P (operands[1])
2314 && sparc_tls_referenced_p (operands [1]))
2316 operands[1] = sparc_legitimize_tls_address (operands[1]);
2317 return false;
2320 /* Fix up PIC cases. */
2321 if (flag_pic && CONSTANT_P (operands[1]))
2323 if (pic_address_needs_scratch (operands[1]))
2324 operands[1] = sparc_legitimize_pic_address (operands[1], NULL_RTX);
2326 /* We cannot use the mov{si,di}_pic_label_ref patterns in all cases. */
2327 if ((GET_CODE (operands[1]) == LABEL_REF
2328 && can_use_mov_pic_label_ref (operands[1]))
2329 || (GET_CODE (operands[1]) == CONST
2330 && GET_CODE (XEXP (operands[1], 0)) == PLUS
2331 && GET_CODE (XEXP (XEXP (operands[1], 0), 0)) == LABEL_REF
2332 && GET_CODE (XEXP (XEXP (operands[1], 0), 1)) == CONST_INT
2333 && can_use_mov_pic_label_ref (XEXP (XEXP (operands[1], 0), 0))))
2335 if (mode == SImode)
2337 emit_insn (gen_movsi_pic_label_ref (operands[0], operands[1]));
2338 return true;
2341 if (mode == DImode)
2343 emit_insn (gen_movdi_pic_label_ref (operands[0], operands[1]));
2344 return true;
2348 if (symbolic_operand (operands[1], mode))
2350 operands[1]
2351 = sparc_legitimize_pic_address (operands[1],
2352 reload_in_progress
2353 ? operands[0] : NULL_RTX);
2354 return false;
2358 /* If we are trying to toss an integer constant into FP registers,
2359 or loading a FP or vector constant, force it into memory. */
2360 if (CONSTANT_P (operands[1])
2361 && REG_P (operands[0])
2362 && (SPARC_FP_REG_P (REGNO (operands[0]))
2363 || SCALAR_FLOAT_MODE_P (mode)
2364 || VECTOR_MODE_P (mode)))
2366 /* emit_group_store will send such bogosity to us when it is
2367 not storing directly into memory. So fix this up to avoid
2368 crashes in output_constant_pool. */
2369 if (operands [1] == const0_rtx)
2370 operands[1] = CONST0_RTX (mode);
2372 /* We can clear or set to all-ones FP registers if TARGET_VIS, and
2373 always other regs. */
2374 if ((TARGET_VIS || REGNO (operands[0]) < SPARC_FIRST_FP_REG)
2375 && (const_zero_operand (operands[1], mode)
2376 || const_all_ones_operand (operands[1], mode)))
2377 return false;
2379 if (REGNO (operands[0]) < SPARC_FIRST_FP_REG
2380 /* We are able to build any SF constant in integer registers
2381 with at most 2 instructions. */
2382 && (mode == SFmode
2383 /* And any DF constant in integer registers if needed. */
2384 || (mode == DFmode && !can_create_pseudo_p ())))
2385 return false;
2387 operands[1] = force_const_mem (mode, operands[1]);
2388 if (!reload_in_progress)
2389 operands[1] = validize_mem (operands[1]);
2390 return false;
2393 /* Accept non-constants and valid constants unmodified. */
2394 if (!CONSTANT_P (operands[1])
2395 || GET_CODE (operands[1]) == HIGH
2396 || input_operand (operands[1], mode))
2397 return false;
2399 switch (mode)
2401 case E_QImode:
2402 /* All QImode constants require only one insn, so proceed. */
2403 break;
2405 case E_HImode:
2406 case E_SImode:
2407 sparc_emit_set_const32 (operands[0], operands[1]);
2408 return true;
2410 case E_DImode:
2411 /* input_operand should have filtered out 32-bit mode. */
2412 sparc_emit_set_const64 (operands[0], operands[1]);
2413 return true;
2415 case E_TImode:
2417 rtx high, low;
2418 /* TImode isn't available in 32-bit mode. */
2419 split_double (operands[1], &high, &low);
2420 emit_insn (gen_movdi (operand_subword (operands[0], 0, 0, TImode),
2421 high));
2422 emit_insn (gen_movdi (operand_subword (operands[0], 1, 0, TImode),
2423 low));
2425 return true;
2427 default:
2428 gcc_unreachable ();
2431 return false;
2434 /* Load OP1, a 32-bit constant, into OP0, a register.
2435 We know it can't be done in one insn when we get
2436 here, the move expander guarantees this. */
2438 static void
2439 sparc_emit_set_const32 (rtx op0, rtx op1)
2441 machine_mode mode = GET_MODE (op0);
2442 rtx temp = op0;
2444 if (can_create_pseudo_p ())
2445 temp = gen_reg_rtx (mode);
2447 if (GET_CODE (op1) == CONST_INT)
2449 gcc_assert (!small_int_operand (op1, mode)
2450 && !const_high_operand (op1, mode));
2452 /* Emit them as real moves instead of a HIGH/LO_SUM,
2453 this way CSE can see everything and reuse intermediate
2454 values if it wants. */
2455 emit_insn (gen_rtx_SET (temp, GEN_INT (INTVAL (op1)
2456 & ~(HOST_WIDE_INT) 0x3ff)));
2458 emit_insn (gen_rtx_SET (op0,
2459 gen_rtx_IOR (mode, temp,
2460 GEN_INT (INTVAL (op1) & 0x3ff))));
2462 else
2464 /* A symbol, emit in the traditional way. */
2465 emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (mode, op1)));
2466 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (mode, temp, op1)));
2470 /* Load OP1, a symbolic 64-bit constant, into OP0, a DImode register.
2471 If TEMP is nonzero, we are forbidden to use any other scratch
2472 registers. Otherwise, we are allowed to generate them as needed.
2474 Note that TEMP may have TImode if the code model is TARGET_CM_MEDANY
2475 or TARGET_CM_EMBMEDANY (see the reload_indi and reload_outdi patterns). */
2477 void
2478 sparc_emit_set_symbolic_const64 (rtx op0, rtx op1, rtx temp)
2480 rtx cst, temp1, temp2, temp3, temp4, temp5;
2481 rtx ti_temp = 0;
2483 /* Deal with too large offsets. */
2484 if (GET_CODE (op1) == CONST
2485 && GET_CODE (XEXP (op1, 0)) == PLUS
2486 && CONST_INT_P (cst = XEXP (XEXP (op1, 0), 1))
2487 && trunc_int_for_mode (INTVAL (cst), SImode) != INTVAL (cst))
2489 gcc_assert (!temp);
2490 temp1 = gen_reg_rtx (DImode);
2491 temp2 = gen_reg_rtx (DImode);
2492 sparc_emit_set_const64 (temp2, cst);
2493 sparc_emit_set_symbolic_const64 (temp1, XEXP (XEXP (op1, 0), 0),
2494 NULL_RTX);
2495 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp1, temp2)));
2496 return;
2499 if (temp && GET_MODE (temp) == TImode)
2501 ti_temp = temp;
2502 temp = gen_rtx_REG (DImode, REGNO (temp));
2505 /* SPARC-V9 code model support. */
2506 switch (sparc_code_model)
2508 case CM_MEDLOW:
2509 /* The range spanned by all instructions in the object is less
2510 than 2^31 bytes (2GB) and the distance from any instruction
2511 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2512 than 2^31 bytes (2GB).
2514 The executable must be in the low 4TB of the virtual address
2515 space.
2517 sethi %hi(symbol), %temp1
2518 or %temp1, %lo(symbol), %reg */
2519 if (temp)
2520 temp1 = temp; /* op0 is allowed. */
2521 else
2522 temp1 = gen_reg_rtx (DImode);
2524 emit_insn (gen_rtx_SET (temp1, gen_rtx_HIGH (DImode, op1)));
2525 emit_insn (gen_rtx_SET (op0, gen_rtx_LO_SUM (DImode, temp1, op1)));
2526 break;
2528 case CM_MEDMID:
2529 /* The range spanned by all instructions in the object is less
2530 than 2^31 bytes (2GB) and the distance from any instruction
2531 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2532 than 2^31 bytes (2GB).
2534 The executable must be in the low 16TB of the virtual address
2535 space.
2537 sethi %h44(symbol), %temp1
2538 or %temp1, %m44(symbol), %temp2
2539 sllx %temp2, 12, %temp3
2540 or %temp3, %l44(symbol), %reg */
2541 if (temp)
2543 temp1 = op0;
2544 temp2 = op0;
2545 temp3 = temp; /* op0 is allowed. */
2547 else
2549 temp1 = gen_reg_rtx (DImode);
2550 temp2 = gen_reg_rtx (DImode);
2551 temp3 = gen_reg_rtx (DImode);
2554 emit_insn (gen_seth44 (temp1, op1));
2555 emit_insn (gen_setm44 (temp2, temp1, op1));
2556 emit_insn (gen_rtx_SET (temp3,
2557 gen_rtx_ASHIFT (DImode, temp2, GEN_INT (12))));
2558 emit_insn (gen_setl44 (op0, temp3, op1));
2559 break;
2561 case CM_MEDANY:
2562 /* The range spanned by all instructions in the object is less
2563 than 2^31 bytes (2GB) and the distance from any instruction
2564 to the location of the label _GLOBAL_OFFSET_TABLE_ is less
2565 than 2^31 bytes (2GB).
2567 The executable can be placed anywhere in the virtual address
2568 space.
2570 sethi %hh(symbol), %temp1
2571 sethi %lm(symbol), %temp2
2572 or %temp1, %hm(symbol), %temp3
2573 sllx %temp3, 32, %temp4
2574 or %temp4, %temp2, %temp5
2575 or %temp5, %lo(symbol), %reg */
2576 if (temp)
2578 /* It is possible that one of the registers we got for operands[2]
2579 might coincide with that of operands[0] (which is why we made
2580 it TImode). Pick the other one to use as our scratch. */
2581 if (rtx_equal_p (temp, op0))
2583 gcc_assert (ti_temp);
2584 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2586 temp1 = op0;
2587 temp2 = temp; /* op0 is _not_ allowed, see above. */
2588 temp3 = op0;
2589 temp4 = op0;
2590 temp5 = op0;
2592 else
2594 temp1 = gen_reg_rtx (DImode);
2595 temp2 = gen_reg_rtx (DImode);
2596 temp3 = gen_reg_rtx (DImode);
2597 temp4 = gen_reg_rtx (DImode);
2598 temp5 = gen_reg_rtx (DImode);
2601 emit_insn (gen_sethh (temp1, op1));
2602 emit_insn (gen_setlm (temp2, op1));
2603 emit_insn (gen_sethm (temp3, temp1, op1));
2604 emit_insn (gen_rtx_SET (temp4,
2605 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2606 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2607 emit_insn (gen_setlo (op0, temp5, op1));
2608 break;
2610 case CM_EMBMEDANY:
2611 /* Old old old backwards compatibility kruft here.
2612 Essentially it is MEDLOW with a fixed 64-bit
2613 virtual base added to all data segment addresses.
2614 Text-segment stuff is computed like MEDANY, we can't
2615 reuse the code above because the relocation knobs
2616 look different.
2618 Data segment: sethi %hi(symbol), %temp1
2619 add %temp1, EMBMEDANY_BASE_REG, %temp2
2620 or %temp2, %lo(symbol), %reg */
2621 if (data_segment_operand (op1, GET_MODE (op1)))
2623 if (temp)
2625 temp1 = temp; /* op0 is allowed. */
2626 temp2 = op0;
2628 else
2630 temp1 = gen_reg_rtx (DImode);
2631 temp2 = gen_reg_rtx (DImode);
2634 emit_insn (gen_embmedany_sethi (temp1, op1));
2635 emit_insn (gen_embmedany_brsum (temp2, temp1));
2636 emit_insn (gen_embmedany_losum (op0, temp2, op1));
2639 /* Text segment: sethi %uhi(symbol), %temp1
2640 sethi %hi(symbol), %temp2
2641 or %temp1, %ulo(symbol), %temp3
2642 sllx %temp3, 32, %temp4
2643 or %temp4, %temp2, %temp5
2644 or %temp5, %lo(symbol), %reg */
2645 else
2647 if (temp)
2649 /* It is possible that one of the registers we got for operands[2]
2650 might coincide with that of operands[0] (which is why we made
2651 it TImode). Pick the other one to use as our scratch. */
2652 if (rtx_equal_p (temp, op0))
2654 gcc_assert (ti_temp);
2655 temp = gen_rtx_REG (DImode, REGNO (temp) + 1);
2657 temp1 = op0;
2658 temp2 = temp; /* op0 is _not_ allowed, see above. */
2659 temp3 = op0;
2660 temp4 = op0;
2661 temp5 = op0;
2663 else
2665 temp1 = gen_reg_rtx (DImode);
2666 temp2 = gen_reg_rtx (DImode);
2667 temp3 = gen_reg_rtx (DImode);
2668 temp4 = gen_reg_rtx (DImode);
2669 temp5 = gen_reg_rtx (DImode);
2672 emit_insn (gen_embmedany_textuhi (temp1, op1));
2673 emit_insn (gen_embmedany_texthi (temp2, op1));
2674 emit_insn (gen_embmedany_textulo (temp3, temp1, op1));
2675 emit_insn (gen_rtx_SET (temp4,
2676 gen_rtx_ASHIFT (DImode, temp3, GEN_INT (32))));
2677 emit_insn (gen_rtx_SET (temp5, gen_rtx_PLUS (DImode, temp4, temp2)));
2678 emit_insn (gen_embmedany_textlo (op0, temp5, op1));
2680 break;
2682 default:
2683 gcc_unreachable ();
2687 /* These avoid problems when cross compiling. If we do not
2688 go through all this hair then the optimizer will see
2689 invalid REG_EQUAL notes or in some cases none at all. */
2690 static rtx gen_safe_HIGH64 (rtx, HOST_WIDE_INT);
2691 static rtx gen_safe_SET64 (rtx, HOST_WIDE_INT);
2692 static rtx gen_safe_OR64 (rtx, HOST_WIDE_INT);
2693 static rtx gen_safe_XOR64 (rtx, HOST_WIDE_INT);
2695 /* The optimizer is not to assume anything about exactly
2696 which bits are set for a HIGH, they are unspecified.
2697 Unfortunately this leads to many missed optimizations
2698 during CSE. We mask out the non-HIGH bits, and matches
2699 a plain movdi, to alleviate this problem. */
2700 static rtx
2701 gen_safe_HIGH64 (rtx dest, HOST_WIDE_INT val)
2703 return gen_rtx_SET (dest, GEN_INT (val & ~(HOST_WIDE_INT)0x3ff));
2706 static rtx
2707 gen_safe_SET64 (rtx dest, HOST_WIDE_INT val)
2709 return gen_rtx_SET (dest, GEN_INT (val));
2712 static rtx
2713 gen_safe_OR64 (rtx src, HOST_WIDE_INT val)
2715 return gen_rtx_IOR (DImode, src, GEN_INT (val));
2718 static rtx
2719 gen_safe_XOR64 (rtx src, HOST_WIDE_INT val)
2721 return gen_rtx_XOR (DImode, src, GEN_INT (val));
2724 /* Worker routines for 64-bit constant formation on arch64.
2725 One of the key things to be doing in these emissions is
2726 to create as many temp REGs as possible. This makes it
2727 possible for half-built constants to be used later when
2728 such values are similar to something required later on.
2729 Without doing this, the optimizer cannot see such
2730 opportunities. */
2732 static void sparc_emit_set_const64_quick1 (rtx, rtx,
2733 unsigned HOST_WIDE_INT, int);
2735 static void
2736 sparc_emit_set_const64_quick1 (rtx op0, rtx temp,
2737 unsigned HOST_WIDE_INT low_bits, int is_neg)
2739 unsigned HOST_WIDE_INT high_bits;
2741 if (is_neg)
2742 high_bits = (~low_bits) & 0xffffffff;
2743 else
2744 high_bits = low_bits;
2746 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2747 if (!is_neg)
2749 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2751 else
2753 /* If we are XOR'ing with -1, then we should emit a one's complement
2754 instead. This way the combiner will notice logical operations
2755 such as ANDN later on and substitute. */
2756 if ((low_bits & 0x3ff) == 0x3ff)
2758 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
2760 else
2762 emit_insn (gen_rtx_SET (op0,
2763 gen_safe_XOR64 (temp,
2764 (-(HOST_WIDE_INT)0x400
2765 | (low_bits & 0x3ff)))));
2770 static void sparc_emit_set_const64_quick2 (rtx, rtx, unsigned HOST_WIDE_INT,
2771 unsigned HOST_WIDE_INT, int);
2773 static void
2774 sparc_emit_set_const64_quick2 (rtx op0, rtx temp,
2775 unsigned HOST_WIDE_INT high_bits,
2776 unsigned HOST_WIDE_INT low_immediate,
2777 int shift_count)
2779 rtx temp2 = op0;
2781 if ((high_bits & 0xfffffc00) != 0)
2783 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2784 if ((high_bits & ~0xfffffc00) != 0)
2785 emit_insn (gen_rtx_SET (op0,
2786 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2787 else
2788 temp2 = temp;
2790 else
2792 emit_insn (gen_safe_SET64 (temp, high_bits));
2793 temp2 = temp;
2796 /* Now shift it up into place. */
2797 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp2,
2798 GEN_INT (shift_count))));
2800 /* If there is a low immediate part piece, finish up by
2801 putting that in as well. */
2802 if (low_immediate != 0)
2803 emit_insn (gen_rtx_SET (op0, gen_safe_OR64 (op0, low_immediate)));
2806 static void sparc_emit_set_const64_longway (rtx, rtx, unsigned HOST_WIDE_INT,
2807 unsigned HOST_WIDE_INT);
2809 /* Full 64-bit constant decomposition. Even though this is the
2810 'worst' case, we still optimize a few things away. */
2811 static void
2812 sparc_emit_set_const64_longway (rtx op0, rtx temp,
2813 unsigned HOST_WIDE_INT high_bits,
2814 unsigned HOST_WIDE_INT low_bits)
2816 rtx sub_temp = op0;
2818 if (can_create_pseudo_p ())
2819 sub_temp = gen_reg_rtx (DImode);
2821 if ((high_bits & 0xfffffc00) != 0)
2823 emit_insn (gen_safe_HIGH64 (temp, high_bits));
2824 if ((high_bits & ~0xfffffc00) != 0)
2825 emit_insn (gen_rtx_SET (sub_temp,
2826 gen_safe_OR64 (temp, (high_bits & 0x3ff))));
2827 else
2828 sub_temp = temp;
2830 else
2832 emit_insn (gen_safe_SET64 (temp, high_bits));
2833 sub_temp = temp;
2836 if (can_create_pseudo_p ())
2838 rtx temp2 = gen_reg_rtx (DImode);
2839 rtx temp3 = gen_reg_rtx (DImode);
2840 rtx temp4 = gen_reg_rtx (DImode);
2842 emit_insn (gen_rtx_SET (temp4, gen_rtx_ASHIFT (DImode, sub_temp,
2843 GEN_INT (32))));
2845 emit_insn (gen_safe_HIGH64 (temp2, low_bits));
2846 if ((low_bits & ~0xfffffc00) != 0)
2848 emit_insn (gen_rtx_SET (temp3,
2849 gen_safe_OR64 (temp2, (low_bits & 0x3ff))));
2850 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp3)));
2852 else
2854 emit_insn (gen_rtx_SET (op0, gen_rtx_PLUS (DImode, temp4, temp2)));
2857 else
2859 rtx low1 = GEN_INT ((low_bits >> (32 - 12)) & 0xfff);
2860 rtx low2 = GEN_INT ((low_bits >> (32 - 12 - 12)) & 0xfff);
2861 rtx low3 = GEN_INT ((low_bits >> (32 - 12 - 12 - 8)) & 0x0ff);
2862 int to_shift = 12;
2864 /* We are in the middle of reload, so this is really
2865 painful. However we do still make an attempt to
2866 avoid emitting truly stupid code. */
2867 if (low1 != const0_rtx)
2869 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2870 GEN_INT (to_shift))));
2871 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low1)));
2872 sub_temp = op0;
2873 to_shift = 12;
2875 else
2877 to_shift += 12;
2879 if (low2 != const0_rtx)
2881 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2882 GEN_INT (to_shift))));
2883 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low2)));
2884 sub_temp = op0;
2885 to_shift = 8;
2887 else
2889 to_shift += 8;
2891 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, sub_temp,
2892 GEN_INT (to_shift))));
2893 if (low3 != const0_rtx)
2894 emit_insn (gen_rtx_SET (op0, gen_rtx_IOR (DImode, op0, low3)));
2895 /* phew... */
2899 /* Analyze a 64-bit constant for certain properties. */
2900 static void analyze_64bit_constant (unsigned HOST_WIDE_INT,
2901 unsigned HOST_WIDE_INT,
2902 int *, int *, int *);
2904 static void
2905 analyze_64bit_constant (unsigned HOST_WIDE_INT high_bits,
2906 unsigned HOST_WIDE_INT low_bits,
2907 int *hbsp, int *lbsp, int *abbasp)
2909 int lowest_bit_set, highest_bit_set, all_bits_between_are_set;
2910 int i;
2912 lowest_bit_set = highest_bit_set = -1;
2913 i = 0;
2916 if ((lowest_bit_set == -1)
2917 && ((low_bits >> i) & 1))
2918 lowest_bit_set = i;
2919 if ((highest_bit_set == -1)
2920 && ((high_bits >> (32 - i - 1)) & 1))
2921 highest_bit_set = (64 - i - 1);
2923 while (++i < 32
2924 && ((highest_bit_set == -1)
2925 || (lowest_bit_set == -1)));
2926 if (i == 32)
2928 i = 0;
2931 if ((lowest_bit_set == -1)
2932 && ((high_bits >> i) & 1))
2933 lowest_bit_set = i + 32;
2934 if ((highest_bit_set == -1)
2935 && ((low_bits >> (32 - i - 1)) & 1))
2936 highest_bit_set = 32 - i - 1;
2938 while (++i < 32
2939 && ((highest_bit_set == -1)
2940 || (lowest_bit_set == -1)));
2942 /* If there are no bits set this should have gone out
2943 as one instruction! */
2944 gcc_assert (lowest_bit_set != -1 && highest_bit_set != -1);
2945 all_bits_between_are_set = 1;
2946 for (i = lowest_bit_set; i <= highest_bit_set; i++)
2948 if (i < 32)
2950 if ((low_bits & (1 << i)) != 0)
2951 continue;
2953 else
2955 if ((high_bits & (1 << (i - 32))) != 0)
2956 continue;
2958 all_bits_between_are_set = 0;
2959 break;
2961 *hbsp = highest_bit_set;
2962 *lbsp = lowest_bit_set;
2963 *abbasp = all_bits_between_are_set;
2966 static int const64_is_2insns (unsigned HOST_WIDE_INT, unsigned HOST_WIDE_INT);
2968 static int
2969 const64_is_2insns (unsigned HOST_WIDE_INT high_bits,
2970 unsigned HOST_WIDE_INT low_bits)
2972 int highest_bit_set, lowest_bit_set, all_bits_between_are_set;
2974 if (high_bits == 0
2975 || high_bits == 0xffffffff)
2976 return 1;
2978 analyze_64bit_constant (high_bits, low_bits,
2979 &highest_bit_set, &lowest_bit_set,
2980 &all_bits_between_are_set);
2982 if ((highest_bit_set == 63
2983 || lowest_bit_set == 0)
2984 && all_bits_between_are_set != 0)
2985 return 1;
2987 if ((highest_bit_set - lowest_bit_set) < 21)
2988 return 1;
2990 return 0;
2993 static unsigned HOST_WIDE_INT create_simple_focus_bits (unsigned HOST_WIDE_INT,
2994 unsigned HOST_WIDE_INT,
2995 int, int);
2997 static unsigned HOST_WIDE_INT
2998 create_simple_focus_bits (unsigned HOST_WIDE_INT high_bits,
2999 unsigned HOST_WIDE_INT low_bits,
3000 int lowest_bit_set, int shift)
3002 HOST_WIDE_INT hi, lo;
3004 if (lowest_bit_set < 32)
3006 lo = (low_bits >> lowest_bit_set) << shift;
3007 hi = ((high_bits << (32 - lowest_bit_set)) << shift);
3009 else
3011 lo = 0;
3012 hi = ((high_bits >> (lowest_bit_set - 32)) << shift);
3014 gcc_assert (! (hi & lo));
3015 return (hi | lo);
3018 /* Here we are sure to be arch64 and this is an integer constant
3019 being loaded into a register. Emit the most efficient
3020 insn sequence possible. Detection of all the 1-insn cases
3021 has been done already. */
3022 static void
3023 sparc_emit_set_const64 (rtx op0, rtx op1)
3025 unsigned HOST_WIDE_INT high_bits, low_bits;
3026 int lowest_bit_set, highest_bit_set;
3027 int all_bits_between_are_set;
3028 rtx temp = 0;
3030 /* Sanity check that we know what we are working with. */
3031 gcc_assert (TARGET_ARCH64
3032 && (GET_CODE (op0) == SUBREG
3033 || (REG_P (op0) && ! SPARC_FP_REG_P (REGNO (op0)))));
3035 if (! can_create_pseudo_p ())
3036 temp = op0;
3038 if (GET_CODE (op1) != CONST_INT)
3040 sparc_emit_set_symbolic_const64 (op0, op1, temp);
3041 return;
3044 if (! temp)
3045 temp = gen_reg_rtx (DImode);
3047 high_bits = ((INTVAL (op1) >> 32) & 0xffffffff);
3048 low_bits = (INTVAL (op1) & 0xffffffff);
3050 /* low_bits bits 0 --> 31
3051 high_bits bits 32 --> 63 */
3053 analyze_64bit_constant (high_bits, low_bits,
3054 &highest_bit_set, &lowest_bit_set,
3055 &all_bits_between_are_set);
3057 /* First try for a 2-insn sequence. */
3059 /* These situations are preferred because the optimizer can
3060 * do more things with them:
3061 * 1) mov -1, %reg
3062 * sllx %reg, shift, %reg
3063 * 2) mov -1, %reg
3064 * srlx %reg, shift, %reg
3065 * 3) mov some_small_const, %reg
3066 * sllx %reg, shift, %reg
3068 if (((highest_bit_set == 63
3069 || lowest_bit_set == 0)
3070 && all_bits_between_are_set != 0)
3071 || ((highest_bit_set - lowest_bit_set) < 12))
3073 HOST_WIDE_INT the_const = -1;
3074 int shift = lowest_bit_set;
3076 if ((highest_bit_set != 63
3077 && lowest_bit_set != 0)
3078 || all_bits_between_are_set == 0)
3080 the_const =
3081 create_simple_focus_bits (high_bits, low_bits,
3082 lowest_bit_set, 0);
3084 else if (lowest_bit_set == 0)
3085 shift = -(63 - highest_bit_set);
3087 gcc_assert (SPARC_SIMM13_P (the_const));
3088 gcc_assert (shift != 0);
3090 emit_insn (gen_safe_SET64 (temp, the_const));
3091 if (shift > 0)
3092 emit_insn (gen_rtx_SET (op0, gen_rtx_ASHIFT (DImode, temp,
3093 GEN_INT (shift))));
3094 else if (shift < 0)
3095 emit_insn (gen_rtx_SET (op0, gen_rtx_LSHIFTRT (DImode, temp,
3096 GEN_INT (-shift))));
3097 return;
3100 /* Now a range of 22 or less bits set somewhere.
3101 * 1) sethi %hi(focus_bits), %reg
3102 * sllx %reg, shift, %reg
3103 * 2) sethi %hi(focus_bits), %reg
3104 * srlx %reg, shift, %reg
3106 if ((highest_bit_set - lowest_bit_set) < 21)
3108 unsigned HOST_WIDE_INT focus_bits =
3109 create_simple_focus_bits (high_bits, low_bits,
3110 lowest_bit_set, 10);
3112 gcc_assert (SPARC_SETHI_P (focus_bits));
3113 gcc_assert (lowest_bit_set != 10);
3115 emit_insn (gen_safe_HIGH64 (temp, focus_bits));
3117 /* If lowest_bit_set == 10 then a sethi alone could have done it. */
3118 if (lowest_bit_set < 10)
3119 emit_insn (gen_rtx_SET (op0,
3120 gen_rtx_LSHIFTRT (DImode, temp,
3121 GEN_INT (10 - lowest_bit_set))));
3122 else if (lowest_bit_set > 10)
3123 emit_insn (gen_rtx_SET (op0,
3124 gen_rtx_ASHIFT (DImode, temp,
3125 GEN_INT (lowest_bit_set - 10))));
3126 return;
3129 /* 1) sethi %hi(low_bits), %reg
3130 * or %reg, %lo(low_bits), %reg
3131 * 2) sethi %hi(~low_bits), %reg
3132 * xor %reg, %lo(-0x400 | (low_bits & 0x3ff)), %reg
3134 if (high_bits == 0
3135 || high_bits == 0xffffffff)
3137 sparc_emit_set_const64_quick1 (op0, temp, low_bits,
3138 (high_bits == 0xffffffff));
3139 return;
3142 /* Now, try 3-insn sequences. */
3144 /* 1) sethi %hi(high_bits), %reg
3145 * or %reg, %lo(high_bits), %reg
3146 * sllx %reg, 32, %reg
3148 if (low_bits == 0)
3150 sparc_emit_set_const64_quick2 (op0, temp, high_bits, 0, 32);
3151 return;
3154 /* We may be able to do something quick
3155 when the constant is negated, so try that. */
3156 if (const64_is_2insns ((~high_bits) & 0xffffffff,
3157 (~low_bits) & 0xfffffc00))
3159 /* NOTE: The trailing bits get XOR'd so we need the
3160 non-negated bits, not the negated ones. */
3161 unsigned HOST_WIDE_INT trailing_bits = low_bits & 0x3ff;
3163 if ((((~high_bits) & 0xffffffff) == 0
3164 && ((~low_bits) & 0x80000000) == 0)
3165 || (((~high_bits) & 0xffffffff) == 0xffffffff
3166 && ((~low_bits) & 0x80000000) != 0))
3168 unsigned HOST_WIDE_INT fast_int = (~low_bits & 0xffffffff);
3170 if ((SPARC_SETHI_P (fast_int)
3171 && (~high_bits & 0xffffffff) == 0)
3172 || SPARC_SIMM13_P (fast_int))
3173 emit_insn (gen_safe_SET64 (temp, fast_int));
3174 else
3175 sparc_emit_set_const64 (temp, GEN_INT (fast_int));
3177 else
3179 rtx negated_const;
3180 negated_const = GEN_INT (((~low_bits) & 0xfffffc00) |
3181 (((HOST_WIDE_INT)((~high_bits) & 0xffffffff))<<32));
3182 sparc_emit_set_const64 (temp, negated_const);
3185 /* If we are XOR'ing with -1, then we should emit a one's complement
3186 instead. This way the combiner will notice logical operations
3187 such as ANDN later on and substitute. */
3188 if (trailing_bits == 0x3ff)
3190 emit_insn (gen_rtx_SET (op0, gen_rtx_NOT (DImode, temp)));
3192 else
3194 emit_insn (gen_rtx_SET (op0,
3195 gen_safe_XOR64 (temp,
3196 (-0x400 | trailing_bits))));
3198 return;
3201 /* 1) sethi %hi(xxx), %reg
3202 * or %reg, %lo(xxx), %reg
3203 * sllx %reg, yyy, %reg
3205 * ??? This is just a generalized version of the low_bits==0
3206 * thing above, FIXME...
3208 if ((highest_bit_set - lowest_bit_set) < 32)
3210 unsigned HOST_WIDE_INT focus_bits =
3211 create_simple_focus_bits (high_bits, low_bits,
3212 lowest_bit_set, 0);
3214 /* We can't get here in this state. */
3215 gcc_assert (highest_bit_set >= 32 && lowest_bit_set < 32);
3217 /* So what we know is that the set bits straddle the
3218 middle of the 64-bit word. */
3219 sparc_emit_set_const64_quick2 (op0, temp,
3220 focus_bits, 0,
3221 lowest_bit_set);
3222 return;
3225 /* 1) sethi %hi(high_bits), %reg
3226 * or %reg, %lo(high_bits), %reg
3227 * sllx %reg, 32, %reg
3228 * or %reg, low_bits, %reg
3230 if (SPARC_SIMM13_P (low_bits) && ((int)low_bits > 0))
3232 sparc_emit_set_const64_quick2 (op0, temp, high_bits, low_bits, 32);
3233 return;
3236 /* The easiest way when all else fails, is full decomposition. */
3237 sparc_emit_set_const64_longway (op0, temp, high_bits, low_bits);
3240 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. */
3242 static bool
3243 sparc_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
3245 *p1 = SPARC_ICC_REG;
3246 *p2 = SPARC_FCC_REG;
3247 return true;
3250 /* Implement TARGET_MIN_ARITHMETIC_PRECISION. */
3252 static unsigned int
3253 sparc_min_arithmetic_precision (void)
3255 return 32;
3258 /* Given a comparison code (EQ, NE, etc.) and the first operand of a COMPARE,
3259 return the mode to be used for the comparison. For floating-point,
3260 CCFP[E]mode is used. CCNZmode should be used when the first operand
3261 is a PLUS, MINUS, NEG, or ASHIFT. CCmode should be used when no special
3262 processing is needed. */
3264 machine_mode
3265 select_cc_mode (enum rtx_code op, rtx x, rtx y)
3267 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3269 switch (op)
3271 case EQ:
3272 case NE:
3273 case UNORDERED:
3274 case ORDERED:
3275 case UNLT:
3276 case UNLE:
3277 case UNGT:
3278 case UNGE:
3279 case UNEQ:
3280 return CCFPmode;
3282 case LT:
3283 case LE:
3284 case GT:
3285 case GE:
3286 case LTGT:
3287 return CCFPEmode;
3289 default:
3290 gcc_unreachable ();
3293 else if ((GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
3294 || GET_CODE (x) == NEG || GET_CODE (x) == ASHIFT)
3295 && y == const0_rtx)
3297 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3298 return CCXNZmode;
3299 else
3300 return CCNZmode;
3302 else
3304 /* This is for the cmp<mode>_sne pattern. */
3305 if (GET_CODE (x) == NOT && y == constm1_rtx)
3307 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3308 return CCXCmode;
3309 else
3310 return CCCmode;
3313 /* This is for the [u]addvdi4_sp32 and [u]subvdi4_sp32 patterns. */
3314 if (!TARGET_ARCH64 && GET_MODE (x) == DImode)
3316 if (GET_CODE (y) == UNSPEC
3317 && (XINT (y, 1) == UNSPEC_ADDV
3318 || XINT (y, 1) == UNSPEC_SUBV
3319 || XINT (y, 1) == UNSPEC_NEGV))
3320 return CCVmode;
3321 else
3322 return CCCmode;
3325 if (TARGET_ARCH64 && GET_MODE (x) == DImode)
3326 return CCXmode;
3327 else
3328 return CCmode;
3332 /* Emit the compare insn and return the CC reg for a CODE comparison
3333 with operands X and Y. */
3335 static rtx
3336 gen_compare_reg_1 (enum rtx_code code, rtx x, rtx y)
3338 machine_mode mode;
3339 rtx cc_reg;
3341 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
3342 return x;
3344 mode = SELECT_CC_MODE (code, x, y);
3346 /* ??? We don't have movcc patterns so we cannot generate pseudo regs for the
3347 fcc regs (cse can't tell they're really call clobbered regs and will
3348 remove a duplicate comparison even if there is an intervening function
3349 call - it will then try to reload the cc reg via an int reg which is why
3350 we need the movcc patterns). It is possible to provide the movcc
3351 patterns by using the ldxfsr/stxfsr v9 insns. I tried it: you need two
3352 registers (say %g1,%g5) and it takes about 6 insns. A better fix would be
3353 to tell cse that CCFPE mode registers (even pseudos) are call
3354 clobbered. */
3356 /* ??? This is an experiment. Rather than making changes to cse which may
3357 or may not be easy/clean, we do our own cse. This is possible because
3358 we will generate hard registers. Cse knows they're call clobbered (it
3359 doesn't know the same thing about pseudos). If we guess wrong, no big
3360 deal, but if we win, great! */
3362 if (TARGET_V9 && GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3363 #if 1 /* experiment */
3365 int reg;
3366 /* We cycle through the registers to ensure they're all exercised. */
3367 static int next_fcc_reg = 0;
3368 /* Previous x,y for each fcc reg. */
3369 static rtx prev_args[4][2];
3371 /* Scan prev_args for x,y. */
3372 for (reg = 0; reg < 4; reg++)
3373 if (prev_args[reg][0] == x && prev_args[reg][1] == y)
3374 break;
3375 if (reg == 4)
3377 reg = next_fcc_reg;
3378 prev_args[reg][0] = x;
3379 prev_args[reg][1] = y;
3380 next_fcc_reg = (next_fcc_reg + 1) & 3;
3382 cc_reg = gen_rtx_REG (mode, reg + SPARC_FIRST_V9_FCC_REG);
3384 #else
3385 cc_reg = gen_reg_rtx (mode);
3386 #endif /* ! experiment */
3387 else if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
3388 cc_reg = gen_rtx_REG (mode, SPARC_FCC_REG);
3389 else
3390 cc_reg = gen_rtx_REG (mode, SPARC_ICC_REG);
3392 /* We shouldn't get there for TFmode if !TARGET_HARD_QUAD. If we do, this
3393 will only result in an unrecognizable insn so no point in asserting. */
3394 emit_insn (gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y)));
3396 return cc_reg;
3400 /* Emit the compare insn and return the CC reg for the comparison in CMP. */
3403 gen_compare_reg (rtx cmp)
3405 return gen_compare_reg_1 (GET_CODE (cmp), XEXP (cmp, 0), XEXP (cmp, 1));
3408 /* This function is used for v9 only.
3409 DEST is the target of the Scc insn.
3410 CODE is the code for an Scc's comparison.
3411 X and Y are the values we compare.
3413 This function is needed to turn
3415 (set (reg:SI 110)
3416 (gt (reg:CCX 100 %icc)
3417 (const_int 0)))
3418 into
3419 (set (reg:SI 110)
3420 (gt:DI (reg:CCX 100 %icc)
3421 (const_int 0)))
3423 IE: The instruction recognizer needs to see the mode of the comparison to
3424 find the right instruction. We could use "gt:DI" right in the
3425 define_expand, but leaving it out allows us to handle DI, SI, etc. */
3427 static int
3428 gen_v9_scc (rtx dest, enum rtx_code compare_code, rtx x, rtx y)
3430 if (! TARGET_ARCH64
3431 && (GET_MODE (x) == DImode
3432 || GET_MODE (dest) == DImode))
3433 return 0;
3435 /* Try to use the movrCC insns. */
3436 if (TARGET_ARCH64
3437 && GET_MODE_CLASS (GET_MODE (x)) == MODE_INT
3438 && y == const0_rtx
3439 && v9_regcmp_p (compare_code))
3441 rtx op0 = x;
3442 rtx temp;
3444 /* Special case for op0 != 0. This can be done with one instruction if
3445 dest == x. */
3447 if (compare_code == NE
3448 && GET_MODE (dest) == DImode
3449 && rtx_equal_p (op0, dest))
3451 emit_insn (gen_rtx_SET (dest,
3452 gen_rtx_IF_THEN_ELSE (DImode,
3453 gen_rtx_fmt_ee (compare_code, DImode,
3454 op0, const0_rtx),
3455 const1_rtx,
3456 dest)));
3457 return 1;
3460 if (reg_overlap_mentioned_p (dest, op0))
3462 /* Handle the case where dest == x.
3463 We "early clobber" the result. */
3464 op0 = gen_reg_rtx (GET_MODE (x));
3465 emit_move_insn (op0, x);
3468 emit_insn (gen_rtx_SET (dest, const0_rtx));
3469 if (GET_MODE (op0) != DImode)
3471 temp = gen_reg_rtx (DImode);
3472 convert_move (temp, op0, 0);
3474 else
3475 temp = op0;
3476 emit_insn (gen_rtx_SET (dest,
3477 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3478 gen_rtx_fmt_ee (compare_code, DImode,
3479 temp, const0_rtx),
3480 const1_rtx,
3481 dest)));
3482 return 1;
3484 else
3486 x = gen_compare_reg_1 (compare_code, x, y);
3487 y = const0_rtx;
3489 emit_insn (gen_rtx_SET (dest, const0_rtx));
3490 emit_insn (gen_rtx_SET (dest,
3491 gen_rtx_IF_THEN_ELSE (GET_MODE (dest),
3492 gen_rtx_fmt_ee (compare_code,
3493 GET_MODE (x), x, y),
3494 const1_rtx, dest)));
3495 return 1;
3500 /* Emit an scc insn. For seq, sne, sgeu, and sltu, we can do this
3501 without jumps using the addx/subx instructions. */
3503 bool
3504 emit_scc_insn (rtx operands[])
3506 rtx tem, x, y;
3507 enum rtx_code code;
3508 machine_mode mode;
3510 /* The quad-word fp compare library routines all return nonzero to indicate
3511 true, which is different from the equivalent libgcc routines, so we must
3512 handle them specially here. */
3513 if (GET_MODE (operands[2]) == TFmode && ! TARGET_HARD_QUAD)
3515 operands[1] = sparc_emit_float_lib_cmp (operands[2], operands[3],
3516 GET_CODE (operands[1]));
3517 operands[2] = XEXP (operands[1], 0);
3518 operands[3] = XEXP (operands[1], 1);
3521 code = GET_CODE (operands[1]);
3522 x = operands[2];
3523 y = operands[3];
3524 mode = GET_MODE (x);
3526 /* For seq/sne on v9 we use the same code as v8 (the addx/subx method has
3527 more applications). The exception to this is "reg != 0" which can
3528 be done in one instruction on v9 (so we do it). */
3529 if ((code == EQ || code == NE) && (mode == SImode || mode == DImode))
3531 if (y != const0_rtx)
3532 x = force_reg (mode, gen_rtx_XOR (mode, x, y));
3534 rtx pat = gen_rtx_SET (operands[0],
3535 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3536 x, const0_rtx));
3538 /* If we can use addx/subx or addxc, add a clobber for CC. */
3539 if (mode == SImode || (code == NE && TARGET_VIS3))
3541 rtx clobber
3542 = gen_rtx_CLOBBER (VOIDmode,
3543 gen_rtx_REG (mode == SImode ? CCmode : CCXmode,
3544 SPARC_ICC_REG));
3545 pat = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, pat, clobber));
3548 emit_insn (pat);
3549 return true;
3552 /* We can do LTU in DImode using the addxc instruction with VIS3. */
3553 if (TARGET_ARCH64
3554 && mode == DImode
3555 && !((code == LTU || code == GTU) && TARGET_VIS3)
3556 && gen_v9_scc (operands[0], code, x, y))
3557 return true;
3559 /* We can do LTU and GEU using the addx/subx instructions too. And
3560 for GTU/LEU, if both operands are registers swap them and fall
3561 back to the easy case. */
3562 if (code == GTU || code == LEU)
3564 if ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
3565 && (GET_CODE (y) == REG || GET_CODE (y) == SUBREG))
3567 tem = x;
3568 x = y;
3569 y = tem;
3570 code = swap_condition (code);
3574 if (code == LTU || code == GEU)
3576 emit_insn (gen_rtx_SET (operands[0],
3577 gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3578 gen_compare_reg_1 (code, x, y),
3579 const0_rtx)));
3580 return true;
3583 /* All the posibilities to use addx/subx based sequences has been
3584 exhausted, try for a 3 instruction sequence using v9 conditional
3585 moves. */
3586 if (TARGET_V9 && gen_v9_scc (operands[0], code, x, y))
3587 return true;
3589 /* Nope, do branches. */
3590 return false;
3593 /* Emit a conditional jump insn for the v9 architecture using comparison code
3594 CODE and jump target LABEL.
3595 This function exists to take advantage of the v9 brxx insns. */
3597 static void
3598 emit_v9_brxx_insn (enum rtx_code code, rtx op0, rtx label)
3600 emit_jump_insn (gen_rtx_SET (pc_rtx,
3601 gen_rtx_IF_THEN_ELSE (VOIDmode,
3602 gen_rtx_fmt_ee (code, GET_MODE (op0),
3603 op0, const0_rtx),
3604 gen_rtx_LABEL_REF (VOIDmode, label),
3605 pc_rtx)));
3608 /* Emit a conditional jump insn for the UA2011 architecture using
3609 comparison code CODE and jump target LABEL. This function exists
3610 to take advantage of the UA2011 Compare and Branch insns. */
3612 static void
3613 emit_cbcond_insn (enum rtx_code code, rtx op0, rtx op1, rtx label)
3615 rtx if_then_else;
3617 if_then_else = gen_rtx_IF_THEN_ELSE (VOIDmode,
3618 gen_rtx_fmt_ee(code, GET_MODE(op0),
3619 op0, op1),
3620 gen_rtx_LABEL_REF (VOIDmode, label),
3621 pc_rtx);
3623 emit_jump_insn (gen_rtx_SET (pc_rtx, if_then_else));
3626 void
3627 emit_conditional_branch_insn (rtx operands[])
3629 /* The quad-word fp compare library routines all return nonzero to indicate
3630 true, which is different from the equivalent libgcc routines, so we must
3631 handle them specially here. */
3632 if (GET_MODE (operands[1]) == TFmode && ! TARGET_HARD_QUAD)
3634 operands[0] = sparc_emit_float_lib_cmp (operands[1], operands[2],
3635 GET_CODE (operands[0]));
3636 operands[1] = XEXP (operands[0], 0);
3637 operands[2] = XEXP (operands[0], 1);
3640 /* If we can tell early on that the comparison is against a constant
3641 that won't fit in the 5-bit signed immediate field of a cbcond,
3642 use one of the other v9 conditional branch sequences. */
3643 if (TARGET_CBCOND
3644 && GET_CODE (operands[1]) == REG
3645 && (GET_MODE (operands[1]) == SImode
3646 || (TARGET_ARCH64 && GET_MODE (operands[1]) == DImode))
3647 && (GET_CODE (operands[2]) != CONST_INT
3648 || SPARC_SIMM5_P (INTVAL (operands[2]))))
3650 emit_cbcond_insn (GET_CODE (operands[0]), operands[1], operands[2], operands[3]);
3651 return;
3654 if (TARGET_ARCH64 && operands[2] == const0_rtx
3655 && GET_CODE (operands[1]) == REG
3656 && GET_MODE (operands[1]) == DImode)
3658 emit_v9_brxx_insn (GET_CODE (operands[0]), operands[1], operands[3]);
3659 return;
3662 operands[1] = gen_compare_reg (operands[0]);
3663 operands[2] = const0_rtx;
3664 operands[0] = gen_rtx_fmt_ee (GET_CODE (operands[0]), VOIDmode,
3665 operands[1], operands[2]);
3666 emit_jump_insn (gen_cbranchcc4 (operands[0], operands[1], operands[2],
3667 operands[3]));
3671 /* Generate a DFmode part of a hard TFmode register.
3672 REG is the TFmode hard register, LOW is 1 for the
3673 low 64bit of the register and 0 otherwise.
3676 gen_df_reg (rtx reg, int low)
3678 int regno = REGNO (reg);
3680 if ((WORDS_BIG_ENDIAN == 0) ^ (low != 0))
3681 regno += (TARGET_ARCH64 && SPARC_INT_REG_P (regno)) ? 1 : 2;
3682 return gen_rtx_REG (DFmode, regno);
3685 /* Generate a call to FUNC with OPERANDS. Operand 0 is the return value.
3686 Unlike normal calls, TFmode operands are passed by reference. It is
3687 assumed that no more than 3 operands are required. */
3689 static void
3690 emit_soft_tfmode_libcall (const char *func_name, int nargs, rtx *operands)
3692 rtx ret_slot = NULL, arg[3], func_sym;
3693 int i;
3695 /* We only expect to be called for conversions, unary, and binary ops. */
3696 gcc_assert (nargs == 2 || nargs == 3);
3698 for (i = 0; i < nargs; ++i)
3700 rtx this_arg = operands[i];
3701 rtx this_slot;
3703 /* TFmode arguments and return values are passed by reference. */
3704 if (GET_MODE (this_arg) == TFmode)
3706 int force_stack_temp;
3708 force_stack_temp = 0;
3709 if (TARGET_BUGGY_QP_LIB && i == 0)
3710 force_stack_temp = 1;
3712 if (GET_CODE (this_arg) == MEM
3713 && ! force_stack_temp)
3715 tree expr = MEM_EXPR (this_arg);
3716 if (expr)
3717 mark_addressable (expr);
3718 this_arg = XEXP (this_arg, 0);
3720 else if (CONSTANT_P (this_arg)
3721 && ! force_stack_temp)
3723 this_slot = force_const_mem (TFmode, this_arg);
3724 this_arg = XEXP (this_slot, 0);
3726 else
3728 this_slot = assign_stack_temp (TFmode, GET_MODE_SIZE (TFmode));
3730 /* Operand 0 is the return value. We'll copy it out later. */
3731 if (i > 0)
3732 emit_move_insn (this_slot, this_arg);
3733 else
3734 ret_slot = this_slot;
3736 this_arg = XEXP (this_slot, 0);
3740 arg[i] = this_arg;
3743 func_sym = gen_rtx_SYMBOL_REF (Pmode, func_name);
3745 if (GET_MODE (operands[0]) == TFmode)
3747 if (nargs == 2)
3748 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3749 arg[0], GET_MODE (arg[0]),
3750 arg[1], GET_MODE (arg[1]));
3751 else
3752 emit_library_call (func_sym, LCT_NORMAL, VOIDmode,
3753 arg[0], GET_MODE (arg[0]),
3754 arg[1], GET_MODE (arg[1]),
3755 arg[2], GET_MODE (arg[2]));
3757 if (ret_slot)
3758 emit_move_insn (operands[0], ret_slot);
3760 else
3762 rtx ret;
3764 gcc_assert (nargs == 2);
3766 ret = emit_library_call_value (func_sym, operands[0], LCT_NORMAL,
3767 GET_MODE (operands[0]),
3768 arg[1], GET_MODE (arg[1]));
3770 if (ret != operands[0])
3771 emit_move_insn (operands[0], ret);
3775 /* Expand soft-float TFmode calls to sparc abi routines. */
3777 static void
3778 emit_soft_tfmode_binop (enum rtx_code code, rtx *operands)
3780 const char *func;
3782 switch (code)
3784 case PLUS:
3785 func = "_Qp_add";
3786 break;
3787 case MINUS:
3788 func = "_Qp_sub";
3789 break;
3790 case MULT:
3791 func = "_Qp_mul";
3792 break;
3793 case DIV:
3794 func = "_Qp_div";
3795 break;
3796 default:
3797 gcc_unreachable ();
3800 emit_soft_tfmode_libcall (func, 3, operands);
3803 static void
3804 emit_soft_tfmode_unop (enum rtx_code code, rtx *operands)
3806 const char *func;
3808 gcc_assert (code == SQRT);
3809 func = "_Qp_sqrt";
3811 emit_soft_tfmode_libcall (func, 2, operands);
3814 static void
3815 emit_soft_tfmode_cvt (enum rtx_code code, rtx *operands)
3817 const char *func;
3819 switch (code)
3821 case FLOAT_EXTEND:
3822 switch (GET_MODE (operands[1]))
3824 case E_SFmode:
3825 func = "_Qp_stoq";
3826 break;
3827 case E_DFmode:
3828 func = "_Qp_dtoq";
3829 break;
3830 default:
3831 gcc_unreachable ();
3833 break;
3835 case FLOAT_TRUNCATE:
3836 switch (GET_MODE (operands[0]))
3838 case E_SFmode:
3839 func = "_Qp_qtos";
3840 break;
3841 case E_DFmode:
3842 func = "_Qp_qtod";
3843 break;
3844 default:
3845 gcc_unreachable ();
3847 break;
3849 case FLOAT:
3850 switch (GET_MODE (operands[1]))
3852 case E_SImode:
3853 func = "_Qp_itoq";
3854 if (TARGET_ARCH64)
3855 operands[1] = gen_rtx_SIGN_EXTEND (DImode, operands[1]);
3856 break;
3857 case E_DImode:
3858 func = "_Qp_xtoq";
3859 break;
3860 default:
3861 gcc_unreachable ();
3863 break;
3865 case UNSIGNED_FLOAT:
3866 switch (GET_MODE (operands[1]))
3868 case E_SImode:
3869 func = "_Qp_uitoq";
3870 if (TARGET_ARCH64)
3871 operands[1] = gen_rtx_ZERO_EXTEND (DImode, operands[1]);
3872 break;
3873 case E_DImode:
3874 func = "_Qp_uxtoq";
3875 break;
3876 default:
3877 gcc_unreachable ();
3879 break;
3881 case FIX:
3882 switch (GET_MODE (operands[0]))
3884 case E_SImode:
3885 func = "_Qp_qtoi";
3886 break;
3887 case E_DImode:
3888 func = "_Qp_qtox";
3889 break;
3890 default:
3891 gcc_unreachable ();
3893 break;
3895 case UNSIGNED_FIX:
3896 switch (GET_MODE (operands[0]))
3898 case E_SImode:
3899 func = "_Qp_qtoui";
3900 break;
3901 case E_DImode:
3902 func = "_Qp_qtoux";
3903 break;
3904 default:
3905 gcc_unreachable ();
3907 break;
3909 default:
3910 gcc_unreachable ();
3913 emit_soft_tfmode_libcall (func, 2, operands);
3916 /* Expand a hard-float tfmode operation. All arguments must be in
3917 registers. */
3919 static void
3920 emit_hard_tfmode_operation (enum rtx_code code, rtx *operands)
3922 rtx op, dest;
3924 if (GET_RTX_CLASS (code) == RTX_UNARY)
3926 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3927 op = gen_rtx_fmt_e (code, GET_MODE (operands[0]), operands[1]);
3929 else
3931 operands[1] = force_reg (GET_MODE (operands[1]), operands[1]);
3932 operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
3933 op = gen_rtx_fmt_ee (code, GET_MODE (operands[0]),
3934 operands[1], operands[2]);
3937 if (register_operand (operands[0], VOIDmode))
3938 dest = operands[0];
3939 else
3940 dest = gen_reg_rtx (GET_MODE (operands[0]));
3942 emit_insn (gen_rtx_SET (dest, op));
3944 if (dest != operands[0])
3945 emit_move_insn (operands[0], dest);
3948 void
3949 emit_tfmode_binop (enum rtx_code code, rtx *operands)
3951 if (TARGET_HARD_QUAD)
3952 emit_hard_tfmode_operation (code, operands);
3953 else
3954 emit_soft_tfmode_binop (code, operands);
3957 void
3958 emit_tfmode_unop (enum rtx_code code, rtx *operands)
3960 if (TARGET_HARD_QUAD)
3961 emit_hard_tfmode_operation (code, operands);
3962 else
3963 emit_soft_tfmode_unop (code, operands);
3966 void
3967 emit_tfmode_cvt (enum rtx_code code, rtx *operands)
3969 if (TARGET_HARD_QUAD)
3970 emit_hard_tfmode_operation (code, operands);
3971 else
3972 emit_soft_tfmode_cvt (code, operands);
3975 /* Return nonzero if a branch/jump/call instruction will be emitting
3976 nop into its delay slot. */
3979 empty_delay_slot (rtx_insn *insn)
3981 rtx seq;
3983 /* If no previous instruction (should not happen), return true. */
3984 if (PREV_INSN (insn) == NULL)
3985 return 1;
3987 seq = NEXT_INSN (PREV_INSN (insn));
3988 if (GET_CODE (PATTERN (seq)) == SEQUENCE)
3989 return 0;
3991 return 1;
3994 /* Return nonzero if we should emit a nop after a cbcond instruction.
3995 The cbcond instruction does not have a delay slot, however there is
3996 a severe performance penalty if a control transfer appears right
3997 after a cbcond. Therefore we emit a nop when we detect this
3998 situation. */
4001 emit_cbcond_nop (rtx_insn *insn)
4003 rtx next = next_active_insn (insn);
4005 if (!next)
4006 return 1;
4008 if (NONJUMP_INSN_P (next)
4009 && GET_CODE (PATTERN (next)) == SEQUENCE)
4010 next = XVECEXP (PATTERN (next), 0, 0);
4011 else if (CALL_P (next)
4012 && GET_CODE (PATTERN (next)) == PARALLEL)
4014 rtx delay = XVECEXP (PATTERN (next), 0, 1);
4016 if (GET_CODE (delay) == RETURN)
4018 /* It's a sibling call. Do not emit the nop if we're going
4019 to emit something other than the jump itself as the first
4020 instruction of the sibcall sequence. */
4021 if (sparc_leaf_function_p || TARGET_FLAT)
4022 return 0;
4026 if (NONJUMP_INSN_P (next))
4027 return 0;
4029 return 1;
4032 /* Return nonzero if TRIAL, an insn, can be combined with a 'restore'
4033 instruction. RETURN_P is true if the v9 variant 'return' is to be
4034 considered in the test too.
4036 TRIAL must be a SET whose destination is a REG appropriate for the
4037 'restore' instruction or, if RETURN_P is true, for the 'return'
4038 instruction. */
4040 static int
4041 eligible_for_restore_insn (rtx trial, bool return_p)
4043 rtx pat = PATTERN (trial);
4044 rtx src = SET_SRC (pat);
4045 bool src_is_freg = false;
4046 rtx src_reg;
4048 /* Since we now can do moves between float and integer registers when
4049 VIS3 is enabled, we have to catch this case. We can allow such
4050 moves when doing a 'return' however. */
4051 src_reg = src;
4052 if (GET_CODE (src_reg) == SUBREG)
4053 src_reg = SUBREG_REG (src_reg);
4054 if (GET_CODE (src_reg) == REG
4055 && SPARC_FP_REG_P (REGNO (src_reg)))
4056 src_is_freg = true;
4058 /* The 'restore src,%g0,dest' pattern for word mode and below. */
4059 if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4060 && arith_operand (src, GET_MODE (src))
4061 && ! src_is_freg)
4063 if (TARGET_ARCH64)
4064 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4065 else
4066 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (SImode);
4069 /* The 'restore src,%g0,dest' pattern for double-word mode. */
4070 else if (GET_MODE_CLASS (GET_MODE (src)) != MODE_FLOAT
4071 && arith_double_operand (src, GET_MODE (src))
4072 && ! src_is_freg)
4073 return GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (DImode);
4075 /* The 'restore src,%g0,dest' pattern for float if no FPU. */
4076 else if (! TARGET_FPU && register_operand (src, SFmode))
4077 return 1;
4079 /* The 'restore src,%g0,dest' pattern for double if no FPU. */
4080 else if (! TARGET_FPU && TARGET_ARCH64 && register_operand (src, DFmode))
4081 return 1;
4083 /* If we have the 'return' instruction, anything that does not use
4084 local or output registers and can go into a delay slot wins. */
4085 else if (return_p && TARGET_V9 && !epilogue_renumber (&pat, 1))
4086 return 1;
4088 /* The 'restore src1,src2,dest' pattern for SImode. */
4089 else if (GET_CODE (src) == PLUS
4090 && register_operand (XEXP (src, 0), SImode)
4091 && arith_operand (XEXP (src, 1), SImode))
4092 return 1;
4094 /* The 'restore src1,src2,dest' pattern for DImode. */
4095 else if (GET_CODE (src) == PLUS
4096 && register_operand (XEXP (src, 0), DImode)
4097 && arith_double_operand (XEXP (src, 1), DImode))
4098 return 1;
4100 /* The 'restore src1,%lo(src2),dest' pattern. */
4101 else if (GET_CODE (src) == LO_SUM
4102 && ! TARGET_CM_MEDMID
4103 && ((register_operand (XEXP (src, 0), SImode)
4104 && immediate_operand (XEXP (src, 1), SImode))
4105 || (TARGET_ARCH64
4106 && register_operand (XEXP (src, 0), DImode)
4107 && immediate_operand (XEXP (src, 1), DImode))))
4108 return 1;
4110 /* The 'restore src,src,dest' pattern. */
4111 else if (GET_CODE (src) == ASHIFT
4112 && (register_operand (XEXP (src, 0), SImode)
4113 || register_operand (XEXP (src, 0), DImode))
4114 && XEXP (src, 1) == const1_rtx)
4115 return 1;
4117 return 0;
4120 /* Return nonzero if TRIAL can go into the function return's delay slot. */
4123 eligible_for_return_delay (rtx_insn *trial)
4125 int regno;
4126 rtx pat;
4128 /* If the function uses __builtin_eh_return, the eh_return machinery
4129 occupies the delay slot. */
4130 if (crtl->calls_eh_return)
4131 return 0;
4133 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4134 return 0;
4136 /* In the case of a leaf or flat function, anything can go into the slot. */
4137 if (sparc_leaf_function_p || TARGET_FLAT)
4138 return 1;
4140 if (!NONJUMP_INSN_P (trial))
4141 return 0;
4143 pat = PATTERN (trial);
4144 if (GET_CODE (pat) == PARALLEL)
4146 int i;
4148 if (! TARGET_V9)
4149 return 0;
4150 for (i = XVECLEN (pat, 0) - 1; i >= 0; i--)
4152 rtx expr = XVECEXP (pat, 0, i);
4153 if (GET_CODE (expr) != SET)
4154 return 0;
4155 if (GET_CODE (SET_DEST (expr)) != REG)
4156 return 0;
4157 regno = REGNO (SET_DEST (expr));
4158 if (regno >= 8 && regno < 24)
4159 return 0;
4161 return !epilogue_renumber (&pat, 1);
4164 if (GET_CODE (pat) != SET)
4165 return 0;
4167 if (GET_CODE (SET_DEST (pat)) != REG)
4168 return 0;
4170 regno = REGNO (SET_DEST (pat));
4172 /* Otherwise, only operations which can be done in tandem with
4173 a `restore' or `return' insn can go into the delay slot. */
4174 if (regno >= 8 && regno < 24)
4175 return 0;
4177 /* If this instruction sets up floating point register and we have a return
4178 instruction, it can probably go in. But restore will not work
4179 with FP_REGS. */
4180 if (! SPARC_INT_REG_P (regno))
4181 return TARGET_V9 && !epilogue_renumber (&pat, 1);
4183 return eligible_for_restore_insn (trial, true);
4186 /* Return nonzero if TRIAL can go into the sibling call's delay slot. */
4189 eligible_for_sibcall_delay (rtx_insn *trial)
4191 rtx pat;
4193 if (get_attr_in_branch_delay (trial) == IN_BRANCH_DELAY_FALSE)
4194 return 0;
4196 if (!NONJUMP_INSN_P (trial))
4197 return 0;
4199 pat = PATTERN (trial);
4201 if (sparc_leaf_function_p || TARGET_FLAT)
4203 /* If the tail call is done using the call instruction,
4204 we have to restore %o7 in the delay slot. */
4205 if (LEAF_SIBCALL_SLOT_RESERVED_P)
4206 return 0;
4208 /* %g1 is used to build the function address */
4209 if (reg_mentioned_p (gen_rtx_REG (Pmode, 1), pat))
4210 return 0;
4212 return 1;
4215 if (GET_CODE (pat) != SET)
4216 return 0;
4218 /* Otherwise, only operations which can be done in tandem with
4219 a `restore' insn can go into the delay slot. */
4220 if (GET_CODE (SET_DEST (pat)) != REG
4221 || (REGNO (SET_DEST (pat)) >= 8 && REGNO (SET_DEST (pat)) < 24)
4222 || ! SPARC_INT_REG_P (REGNO (SET_DEST (pat))))
4223 return 0;
4225 /* If it mentions %o7, it can't go in, because sibcall will clobber it
4226 in most cases. */
4227 if (reg_mentioned_p (gen_rtx_REG (Pmode, 15), pat))
4228 return 0;
4230 return eligible_for_restore_insn (trial, false);
4233 /* Determine if it's legal to put X into the constant pool. This
4234 is not possible if X contains the address of a symbol that is
4235 not constant (TLS) or not known at final link time (PIC). */
4237 static bool
4238 sparc_cannot_force_const_mem (machine_mode mode, rtx x)
4240 switch (GET_CODE (x))
4242 case CONST_INT:
4243 case CONST_WIDE_INT:
4244 case CONST_DOUBLE:
4245 case CONST_VECTOR:
4246 /* Accept all non-symbolic constants. */
4247 return false;
4249 case LABEL_REF:
4250 /* Labels are OK iff we are non-PIC. */
4251 return flag_pic != 0;
4253 case SYMBOL_REF:
4254 /* 'Naked' TLS symbol references are never OK,
4255 non-TLS symbols are OK iff we are non-PIC. */
4256 if (SYMBOL_REF_TLS_MODEL (x))
4257 return true;
4258 else
4259 return flag_pic != 0;
4261 case CONST:
4262 return sparc_cannot_force_const_mem (mode, XEXP (x, 0));
4263 case PLUS:
4264 case MINUS:
4265 return sparc_cannot_force_const_mem (mode, XEXP (x, 0))
4266 || sparc_cannot_force_const_mem (mode, XEXP (x, 1));
4267 case UNSPEC:
4268 return true;
4269 default:
4270 gcc_unreachable ();
4274 /* Global Offset Table support. */
4275 static GTY(()) rtx got_symbol_rtx = NULL_RTX;
4276 static GTY(()) rtx got_register_rtx = NULL_RTX;
4277 static GTY(()) rtx got_helper_rtx = NULL_RTX;
4279 static GTY(()) bool got_helper_needed = false;
4281 /* Return the SYMBOL_REF for the Global Offset Table. */
4283 static rtx
4284 sparc_got (void)
4286 if (!got_symbol_rtx)
4287 got_symbol_rtx = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
4289 return got_symbol_rtx;
4292 /* Output the load_pcrel_sym pattern. */
4294 const char *
4295 output_load_pcrel_sym (rtx *operands)
4297 if (flag_delayed_branch)
4299 output_asm_insn ("sethi\t%%hi(%a1-4), %0", operands);
4300 output_asm_insn ("call\t%a2", operands);
4301 output_asm_insn (" add\t%0, %%lo(%a1+4), %0", operands);
4303 else
4305 output_asm_insn ("sethi\t%%hi(%a1-8), %0", operands);
4306 output_asm_insn ("add\t%0, %%lo(%a1-4), %0", operands);
4307 output_asm_insn ("call\t%a2", operands);
4308 output_asm_insn (" nop", NULL);
4311 if (operands[2] == got_helper_rtx)
4312 got_helper_needed = true;
4314 return "";
4317 #ifdef HAVE_GAS_HIDDEN
4318 # define USE_HIDDEN_LINKONCE 1
4319 #else
4320 # define USE_HIDDEN_LINKONCE 0
4321 #endif
4323 /* Emit code to load the GOT register. */
4325 void
4326 load_got_register (void)
4328 rtx insn;
4330 if (TARGET_VXWORKS_RTP)
4332 if (!got_register_rtx)
4333 got_register_rtx = pic_offset_table_rtx;
4335 insn = gen_vxworks_load_got ();
4337 else
4339 if (!got_register_rtx)
4340 got_register_rtx = gen_rtx_REG (Pmode, GLOBAL_OFFSET_TABLE_REGNUM);
4342 /* The GOT symbol is subject to a PC-relative relocation so we need a
4343 helper function to add the PC value and thus get the final value. */
4344 if (!got_helper_rtx)
4346 char name[32];
4348 /* Skip the leading '%' as that cannot be used in a symbol name. */
4349 if (USE_HIDDEN_LINKONCE)
4350 sprintf (name, "__sparc_get_pc_thunk.%s",
4351 reg_names[REGNO (got_register_rtx)] + 1);
4352 else
4353 ASM_GENERATE_INTERNAL_LABEL (name, "LADDPC",
4354 REGNO (got_register_rtx));
4356 got_helper_rtx = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
4359 /* The load_pcrel_sym{si,di} patterns require absolute addressing. */
4360 const int orig_flag_pic = flag_pic;
4361 flag_pic = 0;
4362 insn = gen_load_pcrel_sym (Pmode,
4363 got_register_rtx,
4364 sparc_got (),
4365 got_helper_rtx,
4366 GEN_INT (GLOBAL_OFFSET_TABLE_REGNUM));
4367 flag_pic = orig_flag_pic;
4370 emit_insn (insn);
4373 /* Ensure that we are not using patterns that are not OK with PIC. */
4376 check_pic (int i)
4378 rtx op;
4380 switch (flag_pic)
4382 case 1:
4383 op = recog_data.operand[i];
4384 gcc_assert (GET_CODE (op) != SYMBOL_REF
4385 && (GET_CODE (op) != CONST
4386 || (GET_CODE (XEXP (op, 0)) == MINUS
4387 && XEXP (XEXP (op, 0), 0) == sparc_got ()
4388 && GET_CODE (XEXP (XEXP (op, 0), 1)) == CONST)));
4389 /* fallthrough */
4390 case 2:
4391 default:
4392 return 1;
4396 /* Return true if X is an address which needs a temporary register when
4397 reloaded while generating PIC code. */
4400 pic_address_needs_scratch (rtx x)
4402 /* An address which is a symbolic plus a non SMALL_INT needs a temp reg. */
4403 if (GET_CODE (x) == CONST
4404 && GET_CODE (XEXP (x, 0)) == PLUS
4405 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
4406 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4407 && !SMALL_INT (XEXP (XEXP (x, 0), 1)))
4408 return 1;
4410 return 0;
4413 /* Determine if a given RTX is a valid constant. We already know this
4414 satisfies CONSTANT_P. */
4416 static bool
4417 sparc_legitimate_constant_p (machine_mode mode, rtx x)
4419 switch (GET_CODE (x))
4421 case CONST:
4422 case SYMBOL_REF:
4423 if (sparc_tls_referenced_p (x))
4424 return false;
4425 break;
4427 case CONST_DOUBLE:
4428 /* Floating point constants are generally not ok.
4429 The only exception is 0.0 and all-ones in VIS. */
4430 if (TARGET_VIS
4431 && SCALAR_FLOAT_MODE_P (mode)
4432 && (const_zero_operand (x, mode)
4433 || const_all_ones_operand (x, mode)))
4434 return true;
4436 return false;
4438 case CONST_VECTOR:
4439 /* Vector constants are generally not ok.
4440 The only exception is 0 or -1 in VIS. */
4441 if (TARGET_VIS
4442 && (const_zero_operand (x, mode)
4443 || const_all_ones_operand (x, mode)))
4444 return true;
4446 return false;
4448 default:
4449 break;
4452 return true;
4455 /* Determine if a given RTX is a valid constant address. */
4457 bool
4458 constant_address_p (rtx x)
4460 switch (GET_CODE (x))
4462 case LABEL_REF:
4463 case CONST_INT:
4464 case HIGH:
4465 return true;
4467 case CONST:
4468 if (flag_pic && pic_address_needs_scratch (x))
4469 return false;
4470 return sparc_legitimate_constant_p (Pmode, x);
4472 case SYMBOL_REF:
4473 return !flag_pic && sparc_legitimate_constant_p (Pmode, x);
4475 default:
4476 return false;
4480 /* Nonzero if the constant value X is a legitimate general operand
4481 when generating PIC code. It is given that flag_pic is on and
4482 that X satisfies CONSTANT_P. */
4484 bool
4485 legitimate_pic_operand_p (rtx x)
4487 if (pic_address_needs_scratch (x))
4488 return false;
4489 if (sparc_tls_referenced_p (x))
4490 return false;
4491 return true;
4494 /* Return true if X is a representation of the PIC register. */
4496 static bool
4497 sparc_pic_register_p (rtx x)
4499 if (!REG_P (x) || !pic_offset_table_rtx)
4500 return false;
4502 if (x == pic_offset_table_rtx)
4503 return true;
4505 if (!HARD_REGISTER_P (pic_offset_table_rtx)
4506 && (HARD_REGISTER_P (x) || lra_in_progress || reload_in_progress)
4507 && ORIGINAL_REGNO (x) == REGNO (pic_offset_table_rtx))
4508 return true;
4510 return false;
4513 #define RTX_OK_FOR_OFFSET_P(X, MODE) \
4514 (CONST_INT_P (X) \
4515 && INTVAL (X) >= -0x1000 \
4516 && INTVAL (X) <= (0x1000 - GET_MODE_SIZE (MODE)))
4518 #define RTX_OK_FOR_OLO10_P(X, MODE) \
4519 (CONST_INT_P (X) \
4520 && INTVAL (X) >= -0x1000 \
4521 && INTVAL (X) <= (0xc00 - GET_MODE_SIZE (MODE)))
4523 /* Handle the TARGET_LEGITIMATE_ADDRESS_P target hook.
4525 On SPARC, the actual legitimate addresses must be REG+REG or REG+SMALLINT
4526 ordinarily. This changes a bit when generating PIC. */
4528 static bool
4529 sparc_legitimate_address_p (machine_mode mode, rtx addr, bool strict,
4530 code_helper)
4532 rtx rs1 = NULL, rs2 = NULL, imm1 = NULL;
4534 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
4535 rs1 = addr;
4536 else if (GET_CODE (addr) == PLUS)
4538 rs1 = XEXP (addr, 0);
4539 rs2 = XEXP (addr, 1);
4541 /* Canonicalize. REG comes first, if there are no regs,
4542 LO_SUM comes first. */
4543 if (!REG_P (rs1)
4544 && GET_CODE (rs1) != SUBREG
4545 && (REG_P (rs2)
4546 || GET_CODE (rs2) == SUBREG
4547 || (GET_CODE (rs2) == LO_SUM && GET_CODE (rs1) != LO_SUM)))
4549 rs1 = XEXP (addr, 1);
4550 rs2 = XEXP (addr, 0);
4553 if ((flag_pic == 1
4554 && sparc_pic_register_p (rs1)
4555 && !REG_P (rs2)
4556 && GET_CODE (rs2) != SUBREG
4557 && GET_CODE (rs2) != LO_SUM
4558 && GET_CODE (rs2) != MEM
4559 && !(GET_CODE (rs2) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs2))
4560 && (! symbolic_operand (rs2, VOIDmode) || mode == Pmode)
4561 && (GET_CODE (rs2) != CONST_INT || SMALL_INT (rs2)))
4562 || ((REG_P (rs1)
4563 || GET_CODE (rs1) == SUBREG)
4564 && RTX_OK_FOR_OFFSET_P (rs2, mode)))
4566 imm1 = rs2;
4567 rs2 = NULL;
4569 else if ((REG_P (rs1) || GET_CODE (rs1) == SUBREG)
4570 && (REG_P (rs2) || GET_CODE (rs2) == SUBREG))
4572 /* We prohibit REG + REG for TFmode when there are no quad move insns
4573 and we consequently need to split. We do this because REG+REG
4574 is not an offsettable address. If we get the situation in reload
4575 where source and destination of a movtf pattern are both MEMs with
4576 REG+REG address, then only one of them gets converted to an
4577 offsettable address. */
4578 if (mode == TFmode
4579 && ! (TARGET_ARCH64 && TARGET_HARD_QUAD))
4580 return 0;
4582 /* Likewise for TImode, but in all cases. */
4583 if (mode == TImode)
4584 return 0;
4586 /* We prohibit REG + REG on ARCH32 if not optimizing for
4587 DFmode/DImode because then mem_min_alignment is likely to be zero
4588 after reload and the forced split would lack a matching splitter
4589 pattern. */
4590 if (TARGET_ARCH32 && !optimize
4591 && (mode == DFmode || mode == DImode))
4592 return 0;
4594 else if (USE_AS_OFFSETABLE_LO10
4595 && GET_CODE (rs1) == LO_SUM
4596 && TARGET_ARCH64
4597 && ! TARGET_CM_MEDMID
4598 && RTX_OK_FOR_OLO10_P (rs2, mode))
4600 rs2 = NULL;
4601 imm1 = XEXP (rs1, 1);
4602 rs1 = XEXP (rs1, 0);
4603 if (!CONSTANT_P (imm1)
4604 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4605 return 0;
4608 else if (GET_CODE (addr) == LO_SUM)
4610 rs1 = XEXP (addr, 0);
4611 imm1 = XEXP (addr, 1);
4613 if (!CONSTANT_P (imm1)
4614 || (GET_CODE (rs1) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (rs1)))
4615 return 0;
4617 /* We can't allow TFmode in 32-bit mode, because an offset greater
4618 than the alignment (8) may cause the LO_SUM to overflow. */
4619 if (mode == TFmode && TARGET_ARCH32)
4620 return 0;
4622 /* During reload, accept the HIGH+LO_SUM construct generated by
4623 sparc_legitimize_reload_address. */
4624 if (reload_in_progress
4625 && GET_CODE (rs1) == HIGH
4626 && XEXP (rs1, 0) == imm1)
4627 return 1;
4629 else if (GET_CODE (addr) == CONST_INT && SMALL_INT (addr))
4630 return 1;
4631 else
4632 return 0;
4634 if (GET_CODE (rs1) == SUBREG)
4635 rs1 = SUBREG_REG (rs1);
4636 if (!REG_P (rs1))
4637 return 0;
4639 if (rs2)
4641 if (GET_CODE (rs2) == SUBREG)
4642 rs2 = SUBREG_REG (rs2);
4643 if (!REG_P (rs2))
4644 return 0;
4647 if (strict)
4649 if (!REGNO_OK_FOR_BASE_P (REGNO (rs1))
4650 || (rs2 && !REGNO_OK_FOR_BASE_P (REGNO (rs2))))
4651 return 0;
4653 else
4655 if ((! SPARC_INT_REG_P (REGNO (rs1))
4656 && REGNO (rs1) != FRAME_POINTER_REGNUM
4657 && REGNO (rs1) < FIRST_PSEUDO_REGISTER)
4658 || (rs2
4659 && (! SPARC_INT_REG_P (REGNO (rs2))
4660 && REGNO (rs2) != FRAME_POINTER_REGNUM
4661 && REGNO (rs2) < FIRST_PSEUDO_REGISTER)))
4662 return 0;
4664 return 1;
4667 /* Return the SYMBOL_REF for the tls_get_addr function. */
4669 static GTY(()) rtx sparc_tls_symbol = NULL_RTX;
4671 static rtx
4672 sparc_tls_get_addr (void)
4674 if (!sparc_tls_symbol)
4675 sparc_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_addr");
4677 return sparc_tls_symbol;
4680 /* Return the Global Offset Table to be used in TLS mode. */
4682 static rtx
4683 sparc_tls_got (void)
4685 /* In PIC mode, this is just the PIC offset table. */
4686 if (flag_pic)
4688 crtl->uses_pic_offset_table = 1;
4689 return pic_offset_table_rtx;
4692 /* In non-PIC mode, Sun as (unlike GNU as) emits PC-relative relocations for
4693 the GOT symbol with the 32-bit ABI, so we reload the GOT register. */
4694 if (TARGET_SUN_TLS && TARGET_ARCH32)
4696 load_got_register ();
4697 return got_register_rtx;
4700 /* In all other cases, we load a new pseudo with the GOT symbol. */
4701 return copy_to_reg (sparc_got ());
4704 /* Return true if X contains a thread-local symbol. */
4706 static bool
4707 sparc_tls_referenced_p (rtx x)
4709 if (!TARGET_HAVE_TLS)
4710 return false;
4712 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
4713 x = XEXP (XEXP (x, 0), 0);
4715 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x))
4716 return true;
4718 /* That's all we handle in sparc_legitimize_tls_address for now. */
4719 return false;
4722 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4723 this (thread-local) address. */
4725 static rtx
4726 sparc_legitimize_tls_address (rtx addr)
4728 rtx temp1, temp2, temp3, ret, o0, got;
4729 rtx_insn *insn;
4731 gcc_assert (can_create_pseudo_p ());
4733 if (GET_CODE (addr) == SYMBOL_REF)
4734 /* Although the various sethi/or sequences generate SImode values, many of
4735 them can be transformed by the linker when relaxing and, if relaxing to
4736 local-exec, will become a sethi/xor pair, which is signed and therefore
4737 a full DImode value in 64-bit mode. Thus we must use Pmode, lest these
4738 values be spilled onto the stack in 64-bit mode. */
4739 switch (SYMBOL_REF_TLS_MODEL (addr))
4741 case TLS_MODEL_GLOBAL_DYNAMIC:
4742 start_sequence ();
4743 temp1 = gen_reg_rtx (Pmode);
4744 temp2 = gen_reg_rtx (Pmode);
4745 ret = gen_reg_rtx (Pmode);
4746 o0 = gen_rtx_REG (Pmode, 8);
4747 got = sparc_tls_got ();
4748 emit_insn (gen_tgd_hi22 (Pmode, temp1, addr));
4749 emit_insn (gen_tgd_lo10 (Pmode, temp2, temp1, addr));
4750 emit_insn (gen_tgd_add (Pmode, o0, got, temp2, addr));
4751 insn = emit_call_insn (gen_tgd_call (Pmode, o0, sparc_tls_get_addr (),
4752 addr, const1_rtx));
4753 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4754 RTL_CONST_CALL_P (insn) = 1;
4755 insn = get_insns ();
4756 end_sequence ();
4757 emit_libcall_block (insn, ret, o0, addr);
4758 break;
4760 case TLS_MODEL_LOCAL_DYNAMIC:
4761 start_sequence ();
4762 temp1 = gen_reg_rtx (Pmode);
4763 temp2 = gen_reg_rtx (Pmode);
4764 temp3 = gen_reg_rtx (Pmode);
4765 ret = gen_reg_rtx (Pmode);
4766 o0 = gen_rtx_REG (Pmode, 8);
4767 got = sparc_tls_got ();
4768 emit_insn (gen_tldm_hi22 (Pmode, temp1));
4769 emit_insn (gen_tldm_lo10 (Pmode, temp2, temp1));
4770 emit_insn (gen_tldm_add (Pmode, o0, got, temp2));
4771 insn = emit_call_insn (gen_tldm_call (Pmode, o0, sparc_tls_get_addr (),
4772 const1_rtx));
4773 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
4774 RTL_CONST_CALL_P (insn) = 1;
4775 insn = get_insns ();
4776 end_sequence ();
4777 /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
4778 share the LD_BASE result with other LD model accesses. */
4779 emit_libcall_block (insn, temp3, o0,
4780 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
4781 UNSPEC_TLSLD_BASE));
4782 temp1 = gen_reg_rtx (Pmode);
4783 temp2 = gen_reg_rtx (Pmode);
4784 emit_insn (gen_tldo_hix22 (Pmode, temp1, addr));
4785 emit_insn (gen_tldo_lox10 (Pmode, temp2, temp1, addr));
4786 emit_insn (gen_tldo_add (Pmode, ret, temp3, temp2, addr));
4787 break;
4789 case TLS_MODEL_INITIAL_EXEC:
4790 temp1 = gen_reg_rtx (Pmode);
4791 temp2 = gen_reg_rtx (Pmode);
4792 temp3 = gen_reg_rtx (Pmode);
4793 got = sparc_tls_got ();
4794 emit_insn (gen_tie_hi22 (Pmode, temp1, addr));
4795 emit_insn (gen_tie_lo10 (Pmode, temp2, temp1, addr));
4796 if (TARGET_ARCH32)
4797 emit_insn (gen_tie_ld32 (temp3, got, temp2, addr));
4798 else
4799 emit_insn (gen_tie_ld64 (temp3, got, temp2, addr));
4800 if (TARGET_SUN_TLS)
4802 ret = gen_reg_rtx (Pmode);
4803 emit_insn (gen_tie_add (Pmode, ret, gen_rtx_REG (Pmode, 7),
4804 temp3, addr));
4806 else
4807 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp3);
4808 break;
4810 case TLS_MODEL_LOCAL_EXEC:
4811 temp1 = gen_reg_rtx (Pmode);
4812 temp2 = gen_reg_rtx (Pmode);
4813 emit_insn (gen_tle_hix22 (Pmode, temp1, addr));
4814 emit_insn (gen_tle_lox10 (Pmode, temp2, temp1, addr));
4815 ret = gen_rtx_PLUS (Pmode, gen_rtx_REG (Pmode, 7), temp2);
4816 break;
4818 default:
4819 gcc_unreachable ();
4822 else if (GET_CODE (addr) == CONST)
4824 rtx base, offset;
4826 gcc_assert (GET_CODE (XEXP (addr, 0)) == PLUS);
4828 base = sparc_legitimize_tls_address (XEXP (XEXP (addr, 0), 0));
4829 offset = XEXP (XEXP (addr, 0), 1);
4831 base = force_operand (base, NULL_RTX);
4832 if (!(GET_CODE (offset) == CONST_INT && SMALL_INT (offset)))
4833 offset = force_reg (Pmode, offset);
4834 ret = gen_rtx_PLUS (Pmode, base, offset);
4837 else
4838 gcc_unreachable (); /* for now ... */
4840 return ret;
4843 /* Legitimize PIC addresses. If the address is already position-independent,
4844 we return ORIG. Newly generated position-independent addresses go into a
4845 reg. This is REG if nonzero, otherwise we allocate register(s) as
4846 necessary. */
4848 static rtx
4849 sparc_legitimize_pic_address (rtx orig, rtx reg)
4851 if (GET_CODE (orig) == SYMBOL_REF
4852 /* See the comment in sparc_expand_move. */
4853 || (GET_CODE (orig) == LABEL_REF && !can_use_mov_pic_label_ref (orig)))
4855 bool gotdata_op = false;
4856 rtx pic_ref, address;
4857 rtx_insn *insn;
4859 if (!reg)
4861 gcc_assert (can_create_pseudo_p ());
4862 reg = gen_reg_rtx (Pmode);
4865 if (flag_pic == 2)
4867 /* If not during reload, allocate another temp reg here for loading
4868 in the address, so that these instructions can be optimized
4869 properly. */
4870 rtx temp_reg = can_create_pseudo_p () ? gen_reg_rtx (Pmode) : reg;
4872 /* Must put the SYMBOL_REF inside an UNSPEC here so that cse
4873 won't get confused into thinking that these two instructions
4874 are loading in the true address of the symbol. If in the
4875 future a PIC rtx exists, that should be used instead. */
4876 if (TARGET_ARCH64)
4878 emit_insn (gen_movdi_high_pic (temp_reg, orig));
4879 emit_insn (gen_movdi_lo_sum_pic (temp_reg, temp_reg, orig));
4881 else
4883 emit_insn (gen_movsi_high_pic (temp_reg, orig));
4884 emit_insn (gen_movsi_lo_sum_pic (temp_reg, temp_reg, orig));
4887 address = temp_reg;
4888 gotdata_op = true;
4890 else
4891 address = orig;
4893 crtl->uses_pic_offset_table = 1;
4894 if (gotdata_op)
4896 if (TARGET_ARCH64)
4897 insn = emit_insn (gen_movdi_pic_gotdata_op (reg,
4898 pic_offset_table_rtx,
4899 address, orig));
4900 else
4901 insn = emit_insn (gen_movsi_pic_gotdata_op (reg,
4902 pic_offset_table_rtx,
4903 address, orig));
4905 else
4907 pic_ref
4908 = gen_const_mem (Pmode,
4909 gen_rtx_PLUS (Pmode,
4910 pic_offset_table_rtx, address));
4911 insn = emit_move_insn (reg, pic_ref);
4914 /* Put a REG_EQUAL note on this insn, so that it can be optimized
4915 by loop. */
4916 set_unique_reg_note (insn, REG_EQUAL, orig);
4917 return reg;
4919 else if (GET_CODE (orig) == CONST)
4921 rtx base, offset;
4923 if (GET_CODE (XEXP (orig, 0)) == PLUS
4924 && sparc_pic_register_p (XEXP (XEXP (orig, 0), 0)))
4925 return orig;
4927 if (!reg)
4929 gcc_assert (can_create_pseudo_p ());
4930 reg = gen_reg_rtx (Pmode);
4933 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
4934 base = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 0), reg);
4935 offset = sparc_legitimize_pic_address (XEXP (XEXP (orig, 0), 1),
4936 base == reg ? NULL_RTX : reg);
4938 if (GET_CODE (offset) == CONST_INT)
4940 if (SMALL_INT (offset))
4941 return plus_constant (Pmode, base, INTVAL (offset));
4942 else if (can_create_pseudo_p ())
4943 offset = force_reg (Pmode, offset);
4944 else
4945 /* If we reach here, then something is seriously wrong. */
4946 gcc_unreachable ();
4948 return gen_rtx_PLUS (Pmode, base, offset);
4950 else if (GET_CODE (orig) == LABEL_REF)
4951 /* ??? We ought to be checking that the register is live instead, in case
4952 it is eliminated. */
4953 crtl->uses_pic_offset_table = 1;
4955 return orig;
4958 /* Try machine-dependent ways of modifying an illegitimate address X
4959 to be legitimate. If we find one, return the new, valid address.
4961 OLDX is the address as it was before break_out_memory_refs was called.
4962 In some cases it is useful to look at this to decide what needs to be done.
4964 MODE is the mode of the operand pointed to by X.
4966 On SPARC, change REG+N into REG+REG, and REG+(X*Y) into REG+REG. */
4968 static rtx
4969 sparc_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
4970 machine_mode mode)
4972 rtx orig_x = x;
4974 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == MULT)
4975 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4976 force_operand (XEXP (x, 0), NULL_RTX));
4977 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == MULT)
4978 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4979 force_operand (XEXP (x, 1), NULL_RTX));
4980 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS)
4981 x = gen_rtx_PLUS (Pmode, force_operand (XEXP (x, 0), NULL_RTX),
4982 XEXP (x, 1));
4983 if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 1)) == PLUS)
4984 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4985 force_operand (XEXP (x, 1), NULL_RTX));
4987 if (x != orig_x && sparc_legitimate_address_p (mode, x, FALSE))
4988 return x;
4990 if (sparc_tls_referenced_p (x))
4991 x = sparc_legitimize_tls_address (x);
4992 else if (flag_pic)
4993 x = sparc_legitimize_pic_address (x, NULL_RTX);
4994 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 1)))
4995 x = gen_rtx_PLUS (Pmode, XEXP (x, 0),
4996 copy_to_mode_reg (Pmode, XEXP (x, 1)));
4997 else if (GET_CODE (x) == PLUS && CONSTANT_ADDRESS_P (XEXP (x, 0)))
4998 x = gen_rtx_PLUS (Pmode, XEXP (x, 1),
4999 copy_to_mode_reg (Pmode, XEXP (x, 0)));
5000 else if (GET_CODE (x) == SYMBOL_REF
5001 || GET_CODE (x) == CONST
5002 || GET_CODE (x) == LABEL_REF)
5003 x = copy_to_suggested_reg (x, NULL_RTX, Pmode);
5005 return x;
5008 /* Delegitimize an address that was legitimized by the above function. */
5010 static rtx
5011 sparc_delegitimize_address (rtx x)
5013 x = delegitimize_mem_from_attrs (x);
5015 if (GET_CODE (x) == LO_SUM)
5016 x = XEXP (x, 1);
5018 if (GET_CODE (x) == UNSPEC)
5019 switch (XINT (x, 1))
5021 case UNSPEC_MOVE_PIC:
5022 case UNSPEC_TLSLE:
5023 x = XVECEXP (x, 0, 0);
5024 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5025 break;
5026 case UNSPEC_MOVE_GOTDATA:
5027 x = XVECEXP (x, 0, 2);
5028 gcc_assert (GET_CODE (x) == SYMBOL_REF);
5029 break;
5030 default:
5031 break;
5034 /* This is generated by mov{si,di}_pic_label_ref in PIC mode. */
5035 if (GET_CODE (x) == MINUS
5036 && (XEXP (x, 0) == got_register_rtx
5037 || sparc_pic_register_p (XEXP (x, 0))))
5039 rtx y = XEXP (x, 1);
5041 if (GET_CODE (y) == LO_SUM)
5042 y = XEXP (y, 1);
5044 if (GET_CODE (y) == UNSPEC && XINT (y, 1) == UNSPEC_MOVE_PIC_LABEL)
5046 x = XVECEXP (y, 0, 0);
5047 gcc_assert (GET_CODE (x) == LABEL_REF
5048 || (GET_CODE (x) == CONST
5049 && GET_CODE (XEXP (x, 0)) == PLUS
5050 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
5051 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT));
5055 return x;
5058 /* SPARC implementation of LEGITIMIZE_RELOAD_ADDRESS. Returns a value to
5059 replace the input X, or the original X if no replacement is called for.
5060 The output parameter *WIN is 1 if the calling macro should goto WIN,
5061 0 if it should not.
5063 For SPARC, we wish to handle addresses by splitting them into
5064 HIGH+LO_SUM pairs, retaining the LO_SUM in the memory reference.
5065 This cuts the number of extra insns by one.
5067 Do nothing when generating PIC code and the address is a symbolic
5068 operand or requires a scratch register. */
5071 sparc_legitimize_reload_address (rtx x, machine_mode mode,
5072 int opnum, int type,
5073 int ind_levels ATTRIBUTE_UNUSED, int *win)
5075 /* Decompose SImode constants into HIGH+LO_SUM. */
5076 if (CONSTANT_P (x)
5077 && (mode != TFmode || TARGET_ARCH64)
5078 && GET_MODE (x) == SImode
5079 && GET_CODE (x) != LO_SUM
5080 && GET_CODE (x) != HIGH
5081 && sparc_code_model <= CM_MEDLOW
5082 && !(flag_pic
5083 && (symbolic_operand (x, Pmode) || pic_address_needs_scratch (x))))
5085 x = gen_rtx_LO_SUM (GET_MODE (x), gen_rtx_HIGH (GET_MODE (x), x), x);
5086 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5087 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5088 opnum, (enum reload_type)type);
5089 *win = 1;
5090 return x;
5093 /* We have to recognize what we have already generated above. */
5094 if (GET_CODE (x) == LO_SUM && GET_CODE (XEXP (x, 0)) == HIGH)
5096 push_reload (XEXP (x, 0), NULL_RTX, &XEXP (x, 0), NULL,
5097 BASE_REG_CLASS, GET_MODE (x), VOIDmode, 0, 0,
5098 opnum, (enum reload_type)type);
5099 *win = 1;
5100 return x;
5103 *win = 0;
5104 return x;
5107 /* Return true if ADDR (a legitimate address expression)
5108 has an effect that depends on the machine mode it is used for.
5110 In PIC mode,
5112 (mem:HI [%l7+a])
5114 is not equivalent to
5116 (mem:QI [%l7+a]) (mem:QI [%l7+a+1])
5118 because [%l7+a+1] is interpreted as the address of (a+1). */
5121 static bool
5122 sparc_mode_dependent_address_p (const_rtx addr,
5123 addr_space_t as ATTRIBUTE_UNUSED)
5125 if (GET_CODE (addr) == PLUS
5126 && sparc_pic_register_p (XEXP (addr, 0))
5127 && symbolic_operand (XEXP (addr, 1), VOIDmode))
5128 return true;
5130 return false;
5133 /* Emit a call instruction with the pattern given by PAT. ADDR is the
5134 address of the call target. */
5136 void
5137 sparc_emit_call_insn (rtx pat, rtx addr)
5139 rtx_insn *insn;
5141 insn = emit_call_insn (pat);
5143 /* The PIC register is live on entry to VxWorks PIC PLT entries. */
5144 if (TARGET_VXWORKS_RTP
5145 && flag_pic
5146 && GET_CODE (addr) == SYMBOL_REF
5147 && (SYMBOL_REF_DECL (addr)
5148 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
5149 : !SYMBOL_REF_LOCAL_P (addr)))
5151 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), pic_offset_table_rtx);
5152 crtl->uses_pic_offset_table = 1;
5156 /* Return 1 if RTX is a MEM which is known to be aligned to at
5157 least a DESIRED byte boundary. */
5160 mem_min_alignment (rtx mem, int desired)
5162 rtx addr, base, offset;
5164 /* If it's not a MEM we can't accept it. */
5165 if (GET_CODE (mem) != MEM)
5166 return 0;
5168 /* Obviously... */
5169 if (!TARGET_UNALIGNED_DOUBLES
5170 && MEM_ALIGN (mem) / BITS_PER_UNIT >= (unsigned)desired)
5171 return 1;
5173 /* ??? The rest of the function predates MEM_ALIGN so
5174 there is probably a bit of redundancy. */
5175 addr = XEXP (mem, 0);
5176 base = offset = NULL_RTX;
5177 if (GET_CODE (addr) == PLUS)
5179 if (GET_CODE (XEXP (addr, 0)) == REG)
5181 base = XEXP (addr, 0);
5183 /* What we are saying here is that if the base
5184 REG is aligned properly, the compiler will make
5185 sure any REG based index upon it will be so
5186 as well. */
5187 if (GET_CODE (XEXP (addr, 1)) == CONST_INT)
5188 offset = XEXP (addr, 1);
5189 else
5190 offset = const0_rtx;
5193 else if (GET_CODE (addr) == REG)
5195 base = addr;
5196 offset = const0_rtx;
5199 if (base != NULL_RTX)
5201 int regno = REGNO (base);
5203 if (regno != HARD_FRAME_POINTER_REGNUM && regno != STACK_POINTER_REGNUM)
5205 /* Check if the compiler has recorded some information
5206 about the alignment of the base REG. If reload has
5207 completed, we already matched with proper alignments.
5208 If not running global_alloc, reload might give us
5209 unaligned pointer to local stack though. */
5210 if (((cfun != 0
5211 && REGNO_POINTER_ALIGN (regno) >= desired * BITS_PER_UNIT)
5212 || (optimize && reload_completed))
5213 && (INTVAL (offset) & (desired - 1)) == 0)
5214 return 1;
5216 else
5218 if (((INTVAL (offset) - SPARC_STACK_BIAS) & (desired - 1)) == 0)
5219 return 1;
5222 else if (! TARGET_UNALIGNED_DOUBLES
5223 || CONSTANT_P (addr)
5224 || GET_CODE (addr) == LO_SUM)
5226 /* Anything else we know is properly aligned unless TARGET_UNALIGNED_DOUBLES
5227 is true, in which case we can only assume that an access is aligned if
5228 it is to a constant address, or the address involves a LO_SUM. */
5229 return 1;
5232 /* An obviously unaligned address. */
5233 return 0;
5237 /* Vectors to keep interesting information about registers where it can easily
5238 be got. We used to use the actual mode value as the bit number, but there
5239 are more than 32 modes now. Instead we use two tables: one indexed by
5240 hard register number, and one indexed by mode. */
5242 /* The purpose of sparc_mode_class is to shrink the range of modes so that
5243 they all fit (as bit numbers) in a 32-bit word (again). Each real mode is
5244 mapped into one sparc_mode_class mode. */
5246 enum sparc_mode_class {
5247 H_MODE, S_MODE, D_MODE, T_MODE, O_MODE,
5248 SF_MODE, DF_MODE, TF_MODE, OF_MODE,
5249 CC_MODE, CCFP_MODE
5252 /* Modes for single-word and smaller quantities. */
5253 #define S_MODES \
5254 ((1 << (int) H_MODE) | (1 << (int) S_MODE) | (1 << (int) SF_MODE))
5256 /* Modes for double-word and smaller quantities. */
5257 #define D_MODES (S_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5259 /* Modes for quad-word and smaller quantities. */
5260 #define T_MODES (D_MODES | (1 << (int) T_MODE) | (1 << (int) TF_MODE))
5262 /* Modes for 8-word and smaller quantities. */
5263 #define O_MODES (T_MODES | (1 << (int) O_MODE) | (1 << (int) OF_MODE))
5265 /* Modes for single-float quantities. */
5266 #define SF_MODES ((1 << (int) S_MODE) | (1 << (int) SF_MODE))
5268 /* Modes for double-float and smaller quantities. */
5269 #define DF_MODES (SF_MODES | (1 << (int) D_MODE) | (1 << (int) DF_MODE))
5271 /* Modes for quad-float and smaller quantities. */
5272 #define TF_MODES (DF_MODES | (1 << (int) TF_MODE))
5274 /* Modes for quad-float pairs and smaller quantities. */
5275 #define OF_MODES (TF_MODES | (1 << (int) OF_MODE))
5277 /* Modes for double-float only quantities. */
5278 #define DF_MODES_NO_S ((1 << (int) D_MODE) | (1 << (int) DF_MODE))
5280 /* Modes for quad-float and double-float only quantities. */
5281 #define TF_MODES_NO_S (DF_MODES_NO_S | (1 << (int) TF_MODE))
5283 /* Modes for quad-float pairs and double-float only quantities. */
5284 #define OF_MODES_NO_S (TF_MODES_NO_S | (1 << (int) OF_MODE))
5286 /* Modes for condition codes. */
5287 #define CC_MODES (1 << (int) CC_MODE)
5288 #define CCFP_MODES (1 << (int) CCFP_MODE)
5290 /* Value is 1 if register/mode pair is acceptable on sparc.
5292 The funny mixture of D and T modes is because integer operations
5293 do not specially operate on tetra quantities, so non-quad-aligned
5294 registers can hold quadword quantities (except %o4 and %i4 because
5295 they cross fixed registers).
5297 ??? Note that, despite the settings, non-double-aligned parameter
5298 registers can hold double-word quantities in 32-bit mode. */
5300 /* This points to either the 32-bit or the 64-bit version. */
5301 static const int *hard_regno_mode_classes;
5303 static const int hard_32bit_mode_classes[] = {
5304 S_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5305 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5306 T_MODES, S_MODES, T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES,
5307 T_MODES, S_MODES, T_MODES, S_MODES, D_MODES, S_MODES, D_MODES, S_MODES,
5309 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5310 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5311 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5312 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5314 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5315 and none can hold SFmode/SImode values. */
5316 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5317 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5318 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5319 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5321 /* %fcc[0123] */
5322 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5324 /* %icc, %sfp, %gsr */
5325 CC_MODES, 0, D_MODES
5328 static const int hard_64bit_mode_classes[] = {
5329 D_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5330 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5331 T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5332 O_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES, T_MODES, D_MODES,
5334 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5335 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5336 OF_MODES, SF_MODES, DF_MODES, SF_MODES, OF_MODES, SF_MODES, DF_MODES, SF_MODES,
5337 OF_MODES, SF_MODES, DF_MODES, SF_MODES, TF_MODES, SF_MODES, DF_MODES, SF_MODES,
5339 /* FP regs f32 to f63. Only the even numbered registers actually exist,
5340 and none can hold SFmode/SImode values. */
5341 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5342 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5343 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, OF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5344 OF_MODES_NO_S, 0, DF_MODES_NO_S, 0, TF_MODES_NO_S, 0, DF_MODES_NO_S, 0,
5346 /* %fcc[0123] */
5347 CCFP_MODES, CCFP_MODES, CCFP_MODES, CCFP_MODES,
5349 /* %icc, %sfp, %gsr */
5350 CC_MODES, 0, D_MODES
5353 static int sparc_mode_class [NUM_MACHINE_MODES];
5355 enum reg_class sparc_regno_reg_class[FIRST_PSEUDO_REGISTER];
5357 static void
5358 sparc_init_modes (void)
5360 int i;
5362 for (i = 0; i < NUM_MACHINE_MODES; i++)
5364 machine_mode m = (machine_mode) i;
5365 unsigned int size = GET_MODE_SIZE (m);
5367 switch (GET_MODE_CLASS (m))
5369 case MODE_INT:
5370 case MODE_PARTIAL_INT:
5371 case MODE_COMPLEX_INT:
5372 if (size < 4)
5373 sparc_mode_class[i] = 1 << (int) H_MODE;
5374 else if (size == 4)
5375 sparc_mode_class[i] = 1 << (int) S_MODE;
5376 else if (size == 8)
5377 sparc_mode_class[i] = 1 << (int) D_MODE;
5378 else if (size == 16)
5379 sparc_mode_class[i] = 1 << (int) T_MODE;
5380 else if (size == 32)
5381 sparc_mode_class[i] = 1 << (int) O_MODE;
5382 else
5383 sparc_mode_class[i] = 0;
5384 break;
5385 case MODE_VECTOR_INT:
5386 if (size == 4)
5387 sparc_mode_class[i] = 1 << (int) SF_MODE;
5388 else if (size == 8)
5389 sparc_mode_class[i] = 1 << (int) DF_MODE;
5390 else
5391 sparc_mode_class[i] = 0;
5392 break;
5393 case MODE_FLOAT:
5394 case MODE_COMPLEX_FLOAT:
5395 if (size == 4)
5396 sparc_mode_class[i] = 1 << (int) SF_MODE;
5397 else if (size == 8)
5398 sparc_mode_class[i] = 1 << (int) DF_MODE;
5399 else if (size == 16)
5400 sparc_mode_class[i] = 1 << (int) TF_MODE;
5401 else if (size == 32)
5402 sparc_mode_class[i] = 1 << (int) OF_MODE;
5403 else
5404 sparc_mode_class[i] = 0;
5405 break;
5406 case MODE_CC:
5407 if (m == CCFPmode || m == CCFPEmode)
5408 sparc_mode_class[i] = 1 << (int) CCFP_MODE;
5409 else
5410 sparc_mode_class[i] = 1 << (int) CC_MODE;
5411 break;
5412 default:
5413 sparc_mode_class[i] = 0;
5414 break;
5418 if (TARGET_ARCH64)
5419 hard_regno_mode_classes = hard_64bit_mode_classes;
5420 else
5421 hard_regno_mode_classes = hard_32bit_mode_classes;
5423 /* Initialize the array used by REGNO_REG_CLASS. */
5424 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5426 if (i < 16 && TARGET_V8PLUS)
5427 sparc_regno_reg_class[i] = I64_REGS;
5428 else if (i < 32 || i == FRAME_POINTER_REGNUM)
5429 sparc_regno_reg_class[i] = GENERAL_REGS;
5430 else if (i < 64)
5431 sparc_regno_reg_class[i] = FP_REGS;
5432 else if (i < 96)
5433 sparc_regno_reg_class[i] = EXTRA_FP_REGS;
5434 else if (i < 100)
5435 sparc_regno_reg_class[i] = FPCC_REGS;
5436 else
5437 sparc_regno_reg_class[i] = NO_REGS;
5441 /* Return whether REGNO, a global or FP register, must be saved/restored. */
5443 static inline bool
5444 save_global_or_fp_reg_p (unsigned int regno,
5445 int leaf_function ATTRIBUTE_UNUSED)
5447 return !call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno);
5450 /* Return whether the return address register (%i7) is needed. */
5452 static inline bool
5453 return_addr_reg_needed_p (int leaf_function)
5455 /* If it is live, for example because of __builtin_return_address (0). */
5456 if (df_regs_ever_live_p (RETURN_ADDR_REGNUM))
5457 return true;
5459 /* Otherwise, it is needed as save register if %o7 is clobbered. */
5460 if (!leaf_function
5461 /* Loading the GOT register clobbers %o7. */
5462 || crtl->uses_pic_offset_table
5463 || df_regs_ever_live_p (INCOMING_RETURN_ADDR_REGNUM))
5464 return true;
5466 return false;
5469 /* Return whether REGNO, a local or in register, must be saved/restored. */
5471 static bool
5472 save_local_or_in_reg_p (unsigned int regno, int leaf_function)
5474 /* General case: call-saved registers live at some point. */
5475 if (!call_used_or_fixed_reg_p (regno) && df_regs_ever_live_p (regno))
5476 return true;
5478 /* Frame pointer register (%fp) if needed. */
5479 if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
5480 return true;
5482 /* Return address register (%i7) if needed. */
5483 if (regno == RETURN_ADDR_REGNUM && return_addr_reg_needed_p (leaf_function))
5484 return true;
5486 /* GOT register (%l7) if needed. */
5487 if (got_register_rtx && regno == REGNO (got_register_rtx))
5488 return true;
5490 /* If the function accesses prior frames, the frame pointer and the return
5491 address of the previous frame must be saved on the stack. */
5492 if (crtl->accesses_prior_frames
5493 && (regno == HARD_FRAME_POINTER_REGNUM || regno == RETURN_ADDR_REGNUM))
5494 return true;
5496 return false;
5499 /* Compute the frame size required by the function. This function is called
5500 during the reload pass and also by sparc_expand_prologue. */
5502 static HOST_WIDE_INT
5503 sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
5505 HOST_WIDE_INT frame_size, apparent_frame_size;
5506 int args_size, n_global_fp_regs = 0;
5507 bool save_local_in_regs_p = false;
5508 unsigned int i;
5510 /* If the function allocates dynamic stack space, the dynamic offset is
5511 computed early and contains REG_PARM_STACK_SPACE, so we need to cope. */
5512 if (leaf_function && !cfun->calls_alloca)
5513 args_size = 0;
5514 else
5515 args_size = crtl->outgoing_args_size + REG_PARM_STACK_SPACE (cfun->decl);
5517 /* Calculate space needed for global registers. */
5518 if (TARGET_ARCH64)
5520 for (i = 0; i < 8; i++)
5521 if (save_global_or_fp_reg_p (i, 0))
5522 n_global_fp_regs += 2;
5524 else
5526 for (i = 0; i < 8; i += 2)
5527 if (save_global_or_fp_reg_p (i, 0)
5528 || save_global_or_fp_reg_p (i + 1, 0))
5529 n_global_fp_regs += 2;
5532 /* In the flat window model, find out which local and in registers need to
5533 be saved. We don't reserve space in the current frame for them as they
5534 will be spilled into the register window save area of the caller's frame.
5535 However, as soon as we use this register window save area, we must create
5536 that of the current frame to make it the live one. */
5537 if (TARGET_FLAT)
5538 for (i = 16; i < 32; i++)
5539 if (save_local_or_in_reg_p (i, leaf_function))
5541 save_local_in_regs_p = true;
5542 break;
5545 /* Calculate space needed for FP registers. */
5546 for (i = 32; i < (TARGET_V9 ? 96 : 64); i += 2)
5547 if (save_global_or_fp_reg_p (i, 0) || save_global_or_fp_reg_p (i + 1, 0))
5548 n_global_fp_regs += 2;
5550 if (size == 0
5551 && n_global_fp_regs == 0
5552 && args_size == 0
5553 && !save_local_in_regs_p)
5554 frame_size = apparent_frame_size = 0;
5555 else
5557 /* Start from the apparent frame size. */
5558 apparent_frame_size = ROUND_UP (size, 8) + n_global_fp_regs * 4;
5560 /* We need to add the size of the outgoing argument area. */
5561 frame_size = apparent_frame_size + ROUND_UP (args_size, 8);
5563 /* And that of the register window save area. */
5564 frame_size += FIRST_PARM_OFFSET (cfun->decl);
5566 /* Finally, bump to the appropriate alignment. */
5567 frame_size = SPARC_STACK_ALIGN (frame_size);
5570 /* Set up values for use in prologue and epilogue. */
5571 sparc_frame_size = frame_size;
5572 sparc_apparent_frame_size = apparent_frame_size;
5573 sparc_n_global_fp_regs = n_global_fp_regs;
5574 sparc_save_local_in_regs_p = save_local_in_regs_p;
5576 return frame_size;
5579 /* Implement the macro INITIAL_ELIMINATION_OFFSET, return the OFFSET. */
5582 sparc_initial_elimination_offset (int to)
5584 int offset;
5586 if (to == STACK_POINTER_REGNUM)
5587 offset = sparc_compute_frame_size (get_frame_size (), crtl->is_leaf);
5588 else
5589 offset = 0;
5591 offset += SPARC_STACK_BIAS;
5592 return offset;
5595 /* Output any necessary .register pseudo-ops. */
5597 void
5598 sparc_output_scratch_registers (FILE *file ATTRIBUTE_UNUSED)
5600 int i;
5602 if (TARGET_ARCH32)
5603 return;
5605 /* Check if %g[2367] were used without
5606 .register being printed for them already. */
5607 for (i = 2; i < 8; i++)
5609 if (df_regs_ever_live_p (i)
5610 && ! sparc_hard_reg_printed [i])
5612 sparc_hard_reg_printed [i] = 1;
5613 /* %g7 is used as TLS base register, use #ignore
5614 for it instead of #scratch. */
5615 fprintf (file, "\t.register\t%%g%d, #%s\n", i,
5616 i == 7 ? "ignore" : "scratch");
5618 if (i == 3) i = 5;
5622 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
5624 #if PROBE_INTERVAL > 4096
5625 #error Cannot use indexed addressing mode for stack probing
5626 #endif
5628 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
5629 inclusive. These are offsets from the current stack pointer.
5631 Note that we don't use the REG+REG addressing mode for the probes because
5632 of the stack bias in 64-bit mode. And it doesn't really buy us anything
5633 so the advantages of having a single code win here. */
5635 static void
5636 sparc_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size)
5638 rtx g1 = gen_rtx_REG (Pmode, 1);
5640 /* See if we have a constant small number of probes to generate. If so,
5641 that's the easy case. */
5642 if (size <= PROBE_INTERVAL)
5644 emit_move_insn (g1, GEN_INT (first));
5645 emit_insn (gen_rtx_SET (g1,
5646 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5647 emit_stack_probe (plus_constant (Pmode, g1, -size));
5650 /* The run-time loop is made up of 9 insns in the generic case while the
5651 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
5652 else if (size <= 4 * PROBE_INTERVAL)
5654 HOST_WIDE_INT i;
5656 emit_move_insn (g1, GEN_INT (first + PROBE_INTERVAL));
5657 emit_insn (gen_rtx_SET (g1,
5658 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5659 emit_stack_probe (g1);
5661 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
5662 it exceeds SIZE. If only two probes are needed, this will not
5663 generate any code. Then probe at FIRST + SIZE. */
5664 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
5666 emit_insn (gen_rtx_SET (g1,
5667 plus_constant (Pmode, g1, -PROBE_INTERVAL)));
5668 emit_stack_probe (g1);
5671 emit_stack_probe (plus_constant (Pmode, g1,
5672 (i - PROBE_INTERVAL) - size));
5675 /* Otherwise, do the same as above, but in a loop. Note that we must be
5676 extra careful with variables wrapping around because we might be at
5677 the very top (or the very bottom) of the address space and we have
5678 to be able to handle this case properly; in particular, we use an
5679 equality test for the loop condition. */
5680 else
5682 HOST_WIDE_INT rounded_size;
5683 rtx g4 = gen_rtx_REG (Pmode, 4);
5685 emit_move_insn (g1, GEN_INT (first));
5688 /* Step 1: round SIZE to the previous multiple of the interval. */
5690 rounded_size = ROUND_DOWN (size, PROBE_INTERVAL);
5691 emit_move_insn (g4, GEN_INT (rounded_size));
5694 /* Step 2: compute initial and final value of the loop counter. */
5696 /* TEST_ADDR = SP + FIRST. */
5697 emit_insn (gen_rtx_SET (g1,
5698 gen_rtx_MINUS (Pmode, stack_pointer_rtx, g1)));
5700 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
5701 emit_insn (gen_rtx_SET (g4, gen_rtx_MINUS (Pmode, g1, g4)));
5704 /* Step 3: the loop
5706 while (TEST_ADDR != LAST_ADDR)
5708 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
5709 probe at TEST_ADDR
5712 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
5713 until it is equal to ROUNDED_SIZE. */
5715 emit_insn (gen_probe_stack_range (Pmode, g1, g1, g4));
5718 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
5719 that SIZE is equal to ROUNDED_SIZE. */
5721 if (size != rounded_size)
5722 emit_stack_probe (plus_constant (Pmode, g4, rounded_size - size));
5725 /* Make sure nothing is scheduled before we are done. */
5726 emit_insn (gen_blockage ());
5729 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
5730 absolute addresses. */
5732 const char *
5733 output_probe_stack_range (rtx reg1, rtx reg2)
5735 static int labelno = 0;
5736 char loop_lab[32];
5737 rtx xops[2];
5739 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
5741 /* Loop. */
5742 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
5744 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
5745 xops[0] = reg1;
5746 xops[1] = GEN_INT (-PROBE_INTERVAL);
5747 output_asm_insn ("add\t%0, %1, %0", xops);
5749 /* Test if TEST_ADDR == LAST_ADDR. */
5750 xops[1] = reg2;
5751 output_asm_insn ("cmp\t%0, %1", xops);
5753 /* Probe at TEST_ADDR and branch. */
5754 if (TARGET_ARCH64)
5755 fputs ("\tbne,pt\t%xcc,", asm_out_file);
5756 else
5757 fputs ("\tbne\t", asm_out_file);
5758 assemble_name_raw (asm_out_file, loop_lab);
5759 fputc ('\n', asm_out_file);
5760 xops[1] = GEN_INT (SPARC_STACK_BIAS);
5761 output_asm_insn (" st\t%%g0, [%0+%1]", xops);
5763 return "";
5766 /* Emit code to save/restore registers from LOW to HIGH at BASE+OFFSET as
5767 needed. LOW is supposed to be double-word aligned for 32-bit registers.
5768 SAVE_P decides whether a register must be saved/restored. ACTION_TRUE
5769 is the action to be performed if SAVE_P returns true and ACTION_FALSE
5770 the action to be performed if it returns false. Return the new offset. */
5772 typedef bool (*sorr_pred_t) (unsigned int, int);
5773 typedef enum { SORR_NONE, SORR_ADVANCE, SORR_SAVE, SORR_RESTORE } sorr_act_t;
5775 static int
5776 emit_save_or_restore_regs (unsigned int low, unsigned int high, rtx base,
5777 int offset, int leaf_function, sorr_pred_t save_p,
5778 sorr_act_t action_true, sorr_act_t action_false)
5780 unsigned int i;
5781 rtx mem;
5782 rtx_insn *insn;
5784 if (TARGET_ARCH64 && high <= 32)
5786 int fp_offset = -1;
5788 for (i = low; i < high; i++)
5790 if (save_p (i, leaf_function))
5792 mem = gen_frame_mem (DImode, plus_constant (Pmode,
5793 base, offset));
5794 if (action_true == SORR_SAVE)
5796 insn = emit_move_insn (mem, gen_rtx_REG (DImode, i));
5797 RTX_FRAME_RELATED_P (insn) = 1;
5799 else /* action_true == SORR_RESTORE */
5801 /* The frame pointer must be restored last since its old
5802 value may be used as base address for the frame. This
5803 is problematic in 64-bit mode only because of the lack
5804 of double-word load instruction. */
5805 if (i == HARD_FRAME_POINTER_REGNUM)
5806 fp_offset = offset;
5807 else
5808 emit_move_insn (gen_rtx_REG (DImode, i), mem);
5810 offset += 8;
5812 else if (action_false == SORR_ADVANCE)
5813 offset += 8;
5816 if (fp_offset >= 0)
5818 mem = gen_frame_mem (DImode, plus_constant (Pmode, base, fp_offset));
5819 emit_move_insn (hard_frame_pointer_rtx, mem);
5822 else
5824 for (i = low; i < high; i += 2)
5826 bool reg0 = save_p (i, leaf_function);
5827 bool reg1 = save_p (i + 1, leaf_function);
5828 machine_mode mode;
5829 int regno;
5831 if (reg0 && reg1)
5833 mode = SPARC_INT_REG_P (i) ? E_DImode : E_DFmode;
5834 regno = i;
5836 else if (reg0)
5838 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5839 regno = i;
5841 else if (reg1)
5843 mode = SPARC_INT_REG_P (i) ? E_SImode : E_SFmode;
5844 regno = i + 1;
5845 offset += 4;
5847 else
5849 if (action_false == SORR_ADVANCE)
5850 offset += 8;
5851 continue;
5854 mem = gen_frame_mem (mode, plus_constant (Pmode, base, offset));
5855 if (action_true == SORR_SAVE)
5857 insn = emit_move_insn (mem, gen_rtx_REG (mode, regno));
5858 RTX_FRAME_RELATED_P (insn) = 1;
5859 if (mode == DImode)
5861 rtx set1, set2;
5862 mem = gen_frame_mem (SImode, plus_constant (Pmode, base,
5863 offset));
5864 set1 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno));
5865 RTX_FRAME_RELATED_P (set1) = 1;
5867 = gen_frame_mem (SImode, plus_constant (Pmode, base,
5868 offset + 4));
5869 set2 = gen_rtx_SET (mem, gen_rtx_REG (SImode, regno + 1));
5870 RTX_FRAME_RELATED_P (set2) = 1;
5871 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
5872 gen_rtx_PARALLEL (VOIDmode,
5873 gen_rtvec (2, set1, set2)));
5876 else /* action_true == SORR_RESTORE */
5877 emit_move_insn (gen_rtx_REG (mode, regno), mem);
5879 /* Bump and round down to double word
5880 in case we already bumped by 4. */
5881 offset = ROUND_DOWN (offset + 8, 8);
5885 return offset;
5888 /* Emit code to adjust BASE to OFFSET. Return the new base. */
5890 static rtx
5891 emit_adjust_base_to_offset (rtx base, int offset)
5893 /* ??? This might be optimized a little as %g1 might already have a
5894 value close enough that a single add insn will do. */
5895 /* ??? Although, all of this is probably only a temporary fix because
5896 if %g1 can hold a function result, then sparc_expand_epilogue will
5897 lose (the result will be clobbered). */
5898 rtx new_base = gen_rtx_REG (Pmode, 1);
5899 emit_move_insn (new_base, GEN_INT (offset));
5900 emit_insn (gen_rtx_SET (new_base, gen_rtx_PLUS (Pmode, base, new_base)));
5901 return new_base;
5904 /* Emit code to save/restore call-saved global and FP registers. */
5906 static void
5907 emit_save_or_restore_global_fp_regs (rtx base, int offset, sorr_act_t action)
5909 if (offset < -4096 || offset + sparc_n_global_fp_regs * 4 > 4095)
5911 base = emit_adjust_base_to_offset (base, offset);
5912 offset = 0;
5915 offset
5916 = emit_save_or_restore_regs (0, 8, base, offset, 0,
5917 save_global_or_fp_reg_p, action, SORR_NONE);
5918 emit_save_or_restore_regs (32, TARGET_V9 ? 96 : 64, base, offset, 0,
5919 save_global_or_fp_reg_p, action, SORR_NONE);
5922 /* Emit code to save/restore call-saved local and in registers. */
5924 static void
5925 emit_save_or_restore_local_in_regs (rtx base, int offset, sorr_act_t action)
5927 if (offset < -4096 || offset + 16 * UNITS_PER_WORD > 4095)
5929 base = emit_adjust_base_to_offset (base, offset);
5930 offset = 0;
5933 emit_save_or_restore_regs (16, 32, base, offset, sparc_leaf_function_p,
5934 save_local_or_in_reg_p, action, SORR_ADVANCE);
5937 /* Emit a window_save insn. */
5939 static rtx_insn *
5940 emit_window_save (rtx increment)
5942 rtx_insn *insn = emit_insn (gen_window_save (increment));
5943 RTX_FRAME_RELATED_P (insn) = 1;
5945 /* The incoming return address (%o7) is saved in %i7. */
5946 add_reg_note (insn, REG_CFA_REGISTER,
5947 gen_rtx_SET (gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM),
5948 gen_rtx_REG (Pmode,
5949 INCOMING_RETURN_ADDR_REGNUM)));
5951 /* The window save event. */
5952 add_reg_note (insn, REG_CFA_WINDOW_SAVE, const0_rtx);
5954 /* The CFA is %fp, the hard frame pointer. */
5955 add_reg_note (insn, REG_CFA_DEF_CFA,
5956 plus_constant (Pmode, hard_frame_pointer_rtx,
5957 INCOMING_FRAME_SP_OFFSET));
5959 return insn;
5962 /* Generate an increment for the stack pointer. */
5964 static rtx
5965 gen_stack_pointer_inc (rtx increment)
5967 return gen_rtx_SET (stack_pointer_rtx,
5968 gen_rtx_PLUS (Pmode,
5969 stack_pointer_rtx,
5970 increment));
5973 /* Expand the function prologue. The prologue is responsible for reserving
5974 storage for the frame, saving the call-saved registers and loading the
5975 GOT register if needed. */
5977 void
5978 sparc_expand_prologue (void)
5980 HOST_WIDE_INT size;
5981 rtx_insn *insn;
5983 /* Compute a snapshot of crtl->uses_only_leaf_regs. Relying
5984 on the final value of the flag means deferring the prologue/epilogue
5985 expansion until just before the second scheduling pass, which is too
5986 late to emit multiple epilogues or return insns.
5988 Of course we are making the assumption that the value of the flag
5989 will not change between now and its final value. Of the three parts
5990 of the formula, only the last one can reasonably vary. Let's take a
5991 closer look, after assuming that the first two ones are set to true
5992 (otherwise the last value is effectively silenced).
5994 If only_leaf_regs_used returns false, the global predicate will also
5995 be false so the actual frame size calculated below will be positive.
5996 As a consequence, the save_register_window insn will be emitted in
5997 the instruction stream; now this insn explicitly references %fp
5998 which is not a leaf register so only_leaf_regs_used will always
5999 return false subsequently.
6001 If only_leaf_regs_used returns true, we hope that the subsequent
6002 optimization passes won't cause non-leaf registers to pop up. For
6003 example, the regrename pass has special provisions to not rename to
6004 non-leaf registers in a leaf function. */
6005 sparc_leaf_function_p
6006 = optimize > 0 && crtl->is_leaf && only_leaf_regs_used ();
6008 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6010 if (flag_stack_usage_info)
6011 current_function_static_stack_size = size;
6013 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6014 || flag_stack_clash_protection)
6016 if (crtl->is_leaf && !cfun->calls_alloca)
6018 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6019 sparc_emit_probe_stack_range (get_stack_check_protect (),
6020 size - get_stack_check_protect ());
6022 else if (size > 0)
6023 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6026 if (size == 0)
6027 ; /* do nothing. */
6028 else if (sparc_leaf_function_p)
6030 rtx size_int_rtx = GEN_INT (-size);
6032 if (size <= 4096)
6033 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6034 else if (size <= 8192)
6036 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6037 RTX_FRAME_RELATED_P (insn) = 1;
6039 /* %sp is still the CFA register. */
6040 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6042 else
6044 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6045 emit_move_insn (size_rtx, size_int_rtx);
6046 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6047 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6048 gen_stack_pointer_inc (size_int_rtx));
6051 RTX_FRAME_RELATED_P (insn) = 1;
6053 /* Ensure no memory access is done before the frame is established. */
6054 emit_insn (gen_frame_blockage ());
6056 else
6058 rtx size_int_rtx = GEN_INT (-size);
6060 if (size <= 4096)
6061 emit_window_save (size_int_rtx);
6062 else if (size <= 8192)
6064 emit_window_save (GEN_INT (-4096));
6066 /* %sp is not the CFA register anymore. */
6067 emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6069 /* Likewise. */
6070 emit_insn (gen_frame_blockage ());
6072 else
6074 rtx size_rtx = gen_rtx_REG (Pmode, 1);
6075 emit_move_insn (size_rtx, size_int_rtx);
6076 emit_window_save (size_rtx);
6080 if (sparc_leaf_function_p)
6082 sparc_frame_base_reg = stack_pointer_rtx;
6083 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6085 else
6087 sparc_frame_base_reg = hard_frame_pointer_rtx;
6088 sparc_frame_base_offset = SPARC_STACK_BIAS;
6091 if (sparc_n_global_fp_regs > 0)
6092 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6093 sparc_frame_base_offset
6094 - sparc_apparent_frame_size,
6095 SORR_SAVE);
6097 /* Advertise that the data calculated just above are now valid. */
6098 sparc_prologue_data_valid_p = true;
6101 /* Expand the function prologue. The prologue is responsible for reserving
6102 storage for the frame, saving the call-saved registers and loading the
6103 GOT register if needed. */
6105 void
6106 sparc_flat_expand_prologue (void)
6108 HOST_WIDE_INT size;
6109 rtx_insn *insn;
6111 sparc_leaf_function_p = optimize > 0 && crtl->is_leaf;
6113 size = sparc_compute_frame_size (get_frame_size(), sparc_leaf_function_p);
6115 if (flag_stack_usage_info)
6116 current_function_static_stack_size = size;
6118 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
6119 || flag_stack_clash_protection)
6121 if (crtl->is_leaf && !cfun->calls_alloca)
6123 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
6124 sparc_emit_probe_stack_range (get_stack_check_protect (),
6125 size - get_stack_check_protect ());
6127 else if (size > 0)
6128 sparc_emit_probe_stack_range (get_stack_check_protect (), size);
6131 if (sparc_save_local_in_regs_p)
6132 emit_save_or_restore_local_in_regs (stack_pointer_rtx, SPARC_STACK_BIAS,
6133 SORR_SAVE);
6135 if (size == 0)
6136 ; /* do nothing. */
6137 else
6139 rtx size_int_rtx, size_rtx;
6141 size_rtx = size_int_rtx = GEN_INT (-size);
6143 /* We establish the frame (i.e. decrement the stack pointer) first, even
6144 if we use a frame pointer, because we cannot clobber any call-saved
6145 registers, including the frame pointer, if we haven't created a new
6146 register save area, for the sake of compatibility with the ABI. */
6147 if (size <= 4096)
6148 insn = emit_insn (gen_stack_pointer_inc (size_int_rtx));
6149 else if (size <= 8192 && !frame_pointer_needed)
6151 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (-4096)));
6152 RTX_FRAME_RELATED_P (insn) = 1;
6153 insn = emit_insn (gen_stack_pointer_inc (GEN_INT (4096 - size)));
6155 else
6157 size_rtx = gen_rtx_REG (Pmode, 1);
6158 emit_move_insn (size_rtx, size_int_rtx);
6159 insn = emit_insn (gen_stack_pointer_inc (size_rtx));
6160 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6161 gen_stack_pointer_inc (size_int_rtx));
6163 RTX_FRAME_RELATED_P (insn) = 1;
6165 /* Ensure no memory access is done before the frame is established. */
6166 emit_insn (gen_frame_blockage ());
6168 if (frame_pointer_needed)
6170 insn = emit_insn (gen_rtx_SET (hard_frame_pointer_rtx,
6171 gen_rtx_MINUS (Pmode,
6172 stack_pointer_rtx,
6173 size_rtx)));
6174 RTX_FRAME_RELATED_P (insn) = 1;
6176 add_reg_note (insn, REG_CFA_ADJUST_CFA,
6177 gen_rtx_SET (hard_frame_pointer_rtx,
6178 plus_constant (Pmode, stack_pointer_rtx,
6179 size)));
6182 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6184 rtx o7 = gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM);
6185 rtx i7 = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
6187 insn = emit_move_insn (i7, o7);
6188 RTX_FRAME_RELATED_P (insn) = 1;
6190 add_reg_note (insn, REG_CFA_REGISTER, gen_rtx_SET (i7, o7));
6192 /* Prevent this instruction from ever being considered dead,
6193 even if this function has no epilogue. */
6194 emit_use (i7);
6198 if (frame_pointer_needed)
6200 sparc_frame_base_reg = hard_frame_pointer_rtx;
6201 sparc_frame_base_offset = SPARC_STACK_BIAS;
6203 else
6205 sparc_frame_base_reg = stack_pointer_rtx;
6206 sparc_frame_base_offset = size + SPARC_STACK_BIAS;
6209 if (sparc_n_global_fp_regs > 0)
6210 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6211 sparc_frame_base_offset
6212 - sparc_apparent_frame_size,
6213 SORR_SAVE);
6215 /* Advertise that the data calculated just above are now valid. */
6216 sparc_prologue_data_valid_p = true;
6219 /* This function generates the assembly code for function entry, which boils
6220 down to emitting the necessary .register directives. */
6222 static void
6223 sparc_asm_function_prologue (FILE *file)
6225 /* Check that the assumption we made in sparc_expand_prologue is valid. */
6226 if (!TARGET_FLAT)
6227 gcc_assert (sparc_leaf_function_p == crtl->uses_only_leaf_regs);
6229 sparc_output_scratch_registers (file);
6232 /* Expand the function epilogue, either normal or part of a sibcall.
6233 We emit all the instructions except the return or the call. */
6235 void
6236 sparc_expand_epilogue (bool for_eh)
6238 HOST_WIDE_INT size = sparc_frame_size;
6240 if (cfun->calls_alloca)
6241 emit_insn (gen_frame_blockage ());
6243 if (sparc_n_global_fp_regs > 0)
6244 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6245 sparc_frame_base_offset
6246 - sparc_apparent_frame_size,
6247 SORR_RESTORE);
6249 if (size == 0 || for_eh)
6250 ; /* do nothing. */
6251 else if (sparc_leaf_function_p)
6253 /* Ensure no memory access is done after the frame is destroyed. */
6254 emit_insn (gen_frame_blockage ());
6256 if (size <= 4096)
6257 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6258 else if (size <= 8192)
6260 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6261 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6263 else
6265 rtx reg = gen_rtx_REG (Pmode, 1);
6266 emit_move_insn (reg, GEN_INT (size));
6267 emit_insn (gen_stack_pointer_inc (reg));
6272 /* Expand the function epilogue, either normal or part of a sibcall.
6273 We emit all the instructions except the return or the call. */
6275 void
6276 sparc_flat_expand_epilogue (bool for_eh)
6278 HOST_WIDE_INT size = sparc_frame_size;
6280 if (sparc_n_global_fp_regs > 0)
6281 emit_save_or_restore_global_fp_regs (sparc_frame_base_reg,
6282 sparc_frame_base_offset
6283 - sparc_apparent_frame_size,
6284 SORR_RESTORE);
6286 /* If we have a frame pointer, we'll need both to restore it before the
6287 frame is destroyed and use its current value in destroying the frame.
6288 Since we don't have an atomic way to do that in the flat window model,
6289 we save the current value into a temporary register (%g1). */
6290 if (frame_pointer_needed && !for_eh)
6291 emit_move_insn (gen_rtx_REG (Pmode, 1), hard_frame_pointer_rtx);
6293 if (return_addr_reg_needed_p (sparc_leaf_function_p))
6294 emit_move_insn (gen_rtx_REG (Pmode, INCOMING_RETURN_ADDR_REGNUM),
6295 gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM));
6297 if (sparc_save_local_in_regs_p)
6298 emit_save_or_restore_local_in_regs (sparc_frame_base_reg,
6299 sparc_frame_base_offset,
6300 SORR_RESTORE);
6302 if (size == 0 || for_eh)
6303 ; /* do nothing. */
6304 else if (frame_pointer_needed)
6306 /* Ensure no memory access is done after the frame is destroyed. */
6307 emit_insn (gen_frame_blockage ());
6309 emit_move_insn (stack_pointer_rtx, gen_rtx_REG (Pmode, 1));
6311 else
6313 /* Likewise. */
6314 emit_insn (gen_frame_blockage ());
6316 if (size <= 4096)
6317 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
6318 else if (size <= 8192)
6320 emit_insn (gen_stack_pointer_inc (GEN_INT (4096)));
6321 emit_insn (gen_stack_pointer_inc (GEN_INT (size - 4096)));
6323 else
6325 rtx reg = gen_rtx_REG (Pmode, 1);
6326 emit_move_insn (reg, GEN_INT (size));
6327 emit_insn (gen_stack_pointer_inc (reg));
6332 /* Return true if it is appropriate to emit `return' instructions in the
6333 body of a function. */
6335 bool
6336 sparc_can_use_return_insn_p (void)
6338 return sparc_prologue_data_valid_p
6339 && sparc_n_global_fp_regs == 0
6340 && TARGET_FLAT
6341 ? (sparc_frame_size == 0 && !sparc_save_local_in_regs_p)
6342 : (sparc_frame_size == 0 || !sparc_leaf_function_p);
6345 /* This function generates the assembly code for function exit. */
6347 static void
6348 sparc_asm_function_epilogue (FILE *file)
6350 /* If the last two instructions of a function are "call foo; dslot;"
6351 the return address might point to the first instruction in the next
6352 function and we have to output a dummy nop for the sake of sane
6353 backtraces in such cases. This is pointless for sibling calls since
6354 the return address is explicitly adjusted. */
6356 rtx_insn *insn = get_last_insn ();
6358 rtx last_real_insn = prev_real_insn (insn);
6359 if (last_real_insn
6360 && NONJUMP_INSN_P (last_real_insn)
6361 && GET_CODE (PATTERN (last_real_insn)) == SEQUENCE)
6362 last_real_insn = XVECEXP (PATTERN (last_real_insn), 0, 0);
6364 if (last_real_insn
6365 && CALL_P (last_real_insn)
6366 && !SIBLING_CALL_P (last_real_insn))
6367 fputs("\tnop\n", file);
6369 sparc_output_deferred_case_vectors ();
6372 /* Output a 'restore' instruction. */
6374 static void
6375 output_restore (rtx pat)
6377 rtx operands[3];
6379 if (! pat)
6381 fputs ("\t restore\n", asm_out_file);
6382 return;
6385 gcc_assert (GET_CODE (pat) == SET);
6387 operands[0] = SET_DEST (pat);
6388 pat = SET_SRC (pat);
6390 switch (GET_CODE (pat))
6392 case PLUS:
6393 operands[1] = XEXP (pat, 0);
6394 operands[2] = XEXP (pat, 1);
6395 output_asm_insn (" restore %r1, %2, %Y0", operands);
6396 break;
6397 case LO_SUM:
6398 operands[1] = XEXP (pat, 0);
6399 operands[2] = XEXP (pat, 1);
6400 output_asm_insn (" restore %r1, %%lo(%a2), %Y0", operands);
6401 break;
6402 case ASHIFT:
6403 operands[1] = XEXP (pat, 0);
6404 gcc_assert (XEXP (pat, 1) == const1_rtx);
6405 output_asm_insn (" restore %r1, %r1, %Y0", operands);
6406 break;
6407 default:
6408 operands[1] = pat;
6409 output_asm_insn (" restore %%g0, %1, %Y0", operands);
6410 break;
6414 /* Output a return. */
6416 const char *
6417 output_return (rtx_insn *insn)
6419 if (crtl->calls_eh_return)
6421 /* If the function uses __builtin_eh_return, the eh_return
6422 machinery occupies the delay slot. */
6423 gcc_assert (!final_sequence);
6425 if (flag_delayed_branch)
6427 if (!TARGET_FLAT && TARGET_V9)
6428 fputs ("\treturn\t%i7+8\n", asm_out_file);
6429 else
6431 if (!TARGET_FLAT)
6432 fputs ("\trestore\n", asm_out_file);
6434 fputs ("\tjmp\t%o7+8\n", asm_out_file);
6437 fputs ("\t add\t%sp, %g1, %sp\n", asm_out_file);
6439 else
6441 if (!TARGET_FLAT)
6442 fputs ("\trestore\n", asm_out_file);
6444 fputs ("\tadd\t%sp, %g1, %sp\n", asm_out_file);
6445 fputs ("\tjmp\t%o7+8\n\t nop\n", asm_out_file);
6448 else if (sparc_leaf_function_p || TARGET_FLAT)
6450 /* This is a leaf or flat function so we don't have to bother restoring
6451 the register window, which frees us from dealing with the convoluted
6452 semantics of restore/return. We simply output the jump to the
6453 return address and the insn in the delay slot (if any). */
6455 return "jmp\t%%o7+%)%#";
6457 else
6459 /* This is a regular function so we have to restore the register window.
6460 We may have a pending insn for the delay slot, which will be either
6461 combined with the 'restore' instruction or put in the delay slot of
6462 the 'return' instruction. */
6464 if (final_sequence)
6466 rtx_insn *delay;
6467 rtx pat;
6469 delay = NEXT_INSN (insn);
6470 gcc_assert (delay);
6472 pat = PATTERN (delay);
6474 if (TARGET_V9 && ! epilogue_renumber (&pat, 1))
6476 epilogue_renumber (&pat, 0);
6477 return "return\t%%i7+%)%#";
6479 else
6481 output_asm_insn ("jmp\t%%i7+%)", NULL);
6483 /* We're going to output the insn in the delay slot manually.
6484 Make sure to output its source location first. */
6485 PATTERN (delay) = gen_blockage ();
6486 INSN_CODE (delay) = -1;
6487 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6488 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6490 output_restore (pat);
6493 else
6495 /* The delay slot is empty. */
6496 if (TARGET_V9)
6497 return "return\t%%i7+%)\n\t nop";
6498 else if (flag_delayed_branch)
6499 return "jmp\t%%i7+%)\n\t restore";
6500 else
6501 return "restore\n\tjmp\t%%o7+%)\n\t nop";
6505 return "";
6508 /* Output a sibling call. */
6510 const char *
6511 output_sibcall (rtx_insn *insn, rtx call_operand)
6513 rtx operands[1];
6515 gcc_assert (flag_delayed_branch);
6517 operands[0] = call_operand;
6519 if (sparc_leaf_function_p || TARGET_FLAT)
6521 /* This is a leaf or flat function so we don't have to bother restoring
6522 the register window. We simply output the jump to the function and
6523 the insn in the delay slot (if any). */
6525 gcc_assert (!(LEAF_SIBCALL_SLOT_RESERVED_P && final_sequence));
6527 if (final_sequence)
6528 output_asm_insn ("sethi\t%%hi(%a0), %%g1\n\tjmp\t%%g1 + %%lo(%a0)%#",
6529 operands);
6530 else
6531 /* Use or with rs2 %%g0 instead of mov, so that as/ld can optimize
6532 it into branch if possible. */
6533 output_asm_insn ("or\t%%o7, %%g0, %%g1\n\tcall\t%a0, 0\n\t or\t%%g1, %%g0, %%o7",
6534 operands);
6536 else
6538 /* This is a regular function so we have to restore the register window.
6539 We may have a pending insn for the delay slot, which will be combined
6540 with the 'restore' instruction. */
6542 output_asm_insn ("call\t%a0, 0", operands);
6544 if (final_sequence)
6546 rtx_insn *delay;
6547 rtx pat;
6549 delay = NEXT_INSN (insn);
6550 gcc_assert (delay);
6552 pat = PATTERN (delay);
6554 /* We're going to output the insn in the delay slot manually.
6555 Make sure to output its source location first. */
6556 PATTERN (delay) = gen_blockage ();
6557 INSN_CODE (delay) = -1;
6558 final_scan_insn (delay, asm_out_file, optimize, 0, NULL);
6559 INSN_LOCATION (delay) = UNKNOWN_LOCATION;
6561 output_restore (pat);
6563 else
6564 output_restore (NULL_RTX);
6567 return "";
6570 /* Functions for handling argument passing.
6572 For 32-bit, the first 6 args are normally in registers and the rest are
6573 pushed. Any arg that starts within the first 6 words is at least
6574 partially passed in a register unless its data type forbids.
6576 For 64-bit, the argument registers are laid out as an array of 16 elements
6577 and arguments are added sequentially. The first 6 int args and up to the
6578 first 16 fp args (depending on size) are passed in regs.
6580 Slot Stack Integral Float Float in structure Double Long Double
6581 ---- ----- -------- ----- ------------------ ------ -----------
6582 15 [SP+248] %f31 %f30,%f31 %d30
6583 14 [SP+240] %f29 %f28,%f29 %d28 %q28
6584 13 [SP+232] %f27 %f26,%f27 %d26
6585 12 [SP+224] %f25 %f24,%f25 %d24 %q24
6586 11 [SP+216] %f23 %f22,%f23 %d22
6587 10 [SP+208] %f21 %f20,%f21 %d20 %q20
6588 9 [SP+200] %f19 %f18,%f19 %d18
6589 8 [SP+192] %f17 %f16,%f17 %d16 %q16
6590 7 [SP+184] %f15 %f14,%f15 %d14
6591 6 [SP+176] %f13 %f12,%f13 %d12 %q12
6592 5 [SP+168] %o5 %f11 %f10,%f11 %d10
6593 4 [SP+160] %o4 %f9 %f8,%f9 %d8 %q8
6594 3 [SP+152] %o3 %f7 %f6,%f7 %d6
6595 2 [SP+144] %o2 %f5 %f4,%f5 %d4 %q4
6596 1 [SP+136] %o1 %f3 %f2,%f3 %d2
6597 0 [SP+128] %o0 %f1 %f0,%f1 %d0 %q0
6599 Here SP = %sp if -mno-stack-bias or %sp+stack_bias otherwise.
6601 Integral arguments are always passed as 64-bit quantities appropriately
6602 extended.
6604 Passing of floating point values is handled as follows.
6605 If a prototype is in scope:
6606 If the value is in a named argument (i.e. not a stdarg function or a
6607 value not part of the `...') then the value is passed in the appropriate
6608 fp reg.
6609 If the value is part of the `...' and is passed in one of the first 6
6610 slots then the value is passed in the appropriate int reg.
6611 If the value is part of the `...' and is not passed in one of the first 6
6612 slots then the value is passed in memory.
6613 If a prototype is not in scope:
6614 If the value is one of the first 6 arguments the value is passed in the
6615 appropriate integer reg and the appropriate fp reg.
6616 If the value is not one of the first 6 arguments the value is passed in
6617 the appropriate fp reg and in memory.
6620 Summary of the calling conventions implemented by GCC on the SPARC:
6622 32-bit ABI:
6623 size argument return value
6625 small integer <4 int. reg. int. reg.
6626 word 4 int. reg. int. reg.
6627 double word 8 int. reg. int. reg.
6629 _Complex small integer <8 int. reg. int. reg.
6630 _Complex word 8 int. reg. int. reg.
6631 _Complex double word 16 memory int. reg.
6633 vector integer <=8 int. reg. FP reg.
6634 vector integer >8 memory memory
6636 float 4 int. reg. FP reg.
6637 double 8 int. reg. FP reg.
6638 long double 16 memory memory
6640 _Complex float 8 memory FP reg.
6641 _Complex double 16 memory FP reg.
6642 _Complex long double 32 memory FP reg.
6644 vector float any memory memory
6646 aggregate any memory memory
6650 64-bit ABI:
6651 size argument return value
6653 small integer <8 int. reg. int. reg.
6654 word 8 int. reg. int. reg.
6655 double word 16 int. reg. int. reg.
6657 _Complex small integer <16 int. reg. int. reg.
6658 _Complex word 16 int. reg. int. reg.
6659 _Complex double word 32 memory int. reg.
6661 vector integer <=16 FP reg. FP reg.
6662 vector integer 16<s<=32 memory FP reg.
6663 vector integer >32 memory memory
6665 float 4 FP reg. FP reg.
6666 double 8 FP reg. FP reg.
6667 long double 16 FP reg. FP reg.
6669 _Complex float 8 FP reg. FP reg.
6670 _Complex double 16 FP reg. FP reg.
6671 _Complex long double 32 memory FP reg.
6673 vector float <=16 FP reg. FP reg.
6674 vector float 16<s<=32 memory FP reg.
6675 vector float >32 memory memory
6677 aggregate <=16 reg. reg.
6678 aggregate 16<s<=32 memory reg.
6679 aggregate >32 memory memory
6683 Note #1: complex floating-point types follow the extended SPARC ABIs as
6684 implemented by the Sun compiler.
6686 Note #2: integer vector types follow the scalar floating-point types
6687 conventions to match what is implemented by the Sun VIS SDK.
6689 Note #3: floating-point vector types follow the aggregate types
6690 conventions. */
6693 /* Maximum number of int regs for args. */
6694 #define SPARC_INT_ARG_MAX 6
6695 /* Maximum number of fp regs for args. */
6696 #define SPARC_FP_ARG_MAX 16
6697 /* Number of words (partially) occupied for a given size in units. */
6698 #define CEIL_NWORDS(SIZE) CEIL((SIZE), UNITS_PER_WORD)
6700 /* Handle the INIT_CUMULATIVE_ARGS macro.
6701 Initialize a variable CUM of type CUMULATIVE_ARGS
6702 for a call to a function whose data type is FNTYPE.
6703 For a library call, FNTYPE is 0. */
6705 void
6706 init_cumulative_args (struct sparc_args *cum, tree fntype, rtx, tree)
6708 cum->words = 0;
6709 cum->prototype_p = fntype && prototype_p (fntype);
6710 cum->libcall_p = !fntype;
6713 /* Handle promotion of pointer and integer arguments. */
6715 static machine_mode
6716 sparc_promote_function_mode (const_tree type, machine_mode mode,
6717 int *punsignedp, const_tree, int)
6719 if (type && POINTER_TYPE_P (type))
6721 *punsignedp = POINTERS_EXTEND_UNSIGNED;
6722 return Pmode;
6725 /* Integral arguments are passed as full words, as per the ABI. */
6726 if (GET_MODE_CLASS (mode) == MODE_INT
6727 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
6728 return word_mode;
6730 return mode;
6733 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook. */
6735 static bool
6736 sparc_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
6738 return TARGET_ARCH64 ? true : false;
6741 /* Handle the TARGET_PASS_BY_REFERENCE target hook.
6742 Specify whether to pass the argument by reference. */
6744 static bool
6745 sparc_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6747 tree type = arg.type;
6748 machine_mode mode = arg.mode;
6749 if (TARGET_ARCH32)
6750 /* Original SPARC 32-bit ABI says that structures and unions,
6751 and quad-precision floats are passed by reference.
6752 All other base types are passed in registers.
6754 Extended ABI (as implemented by the Sun compiler) says that all
6755 complex floats are passed by reference. Pass complex integers
6756 in registers up to 8 bytes. More generally, enforce the 2-word
6757 cap for passing arguments in registers.
6759 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6760 vectors are passed like floats of the same size, that is in
6761 registers up to 8 bytes. Pass all vector floats by reference
6762 like structure and unions. */
6763 return ((type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
6764 || mode == SCmode
6765 /* Catch CDImode, TFmode, DCmode and TCmode. */
6766 || GET_MODE_SIZE (mode) > 8
6767 || (type
6768 && VECTOR_TYPE_P (type)
6769 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
6770 else
6771 /* Original SPARC 64-bit ABI says that structures and unions
6772 smaller than 16 bytes are passed in registers, as well as
6773 all other base types.
6775 Extended ABI (as implemented by the Sun compiler) says that
6776 complex floats are passed in registers up to 16 bytes. Pass
6777 all complex integers in registers up to 16 bytes. More generally,
6778 enforce the 2-word cap for passing arguments in registers.
6780 Vector ABI (as implemented by the Sun VIS SDK) says that integer
6781 vectors are passed like floats of the same size, that is in
6782 registers (up to 16 bytes). Pass all vector floats like structure
6783 and unions. */
6784 return ((type
6785 && (AGGREGATE_TYPE_P (type) || VECTOR_TYPE_P (type))
6786 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 16)
6787 /* Catch CTImode and TCmode. */
6788 || GET_MODE_SIZE (mode) > 16);
6791 /* Return true if TYPE is considered as a floating-point type by the ABI. */
6793 static bool
6794 fp_type_for_abi (const_tree type)
6796 /* This is the original GCC implementation. */
6797 if (FLOAT_TYPE_P (type) || VECTOR_TYPE_P (type))
6798 return true;
6800 /* This has been introduced in GCC 14 to match the vendor compiler. */
6801 if (SUN_V9_ABI_COMPATIBILITY && TREE_CODE (type) == ARRAY_TYPE)
6802 return fp_type_for_abi (TREE_TYPE (type));
6804 return false;
6807 /* Traverse the record TYPE recursively and call FUNC on its fields.
6808 NAMED is true if this is for a named parameter. DATA is passed
6809 to FUNC for each field. OFFSET is the starting position and
6810 PACKED is true if we are inside a packed record. */
6812 template <typename T, void Func (const_tree, int, bool, T*)>
6813 static void
6814 traverse_record_type (const_tree type, bool named, T *data,
6815 int offset = 0, bool packed = false)
6817 /* The ABI obviously doesn't specify how packed structures are passed.
6818 These are passed in integer regs if possible, otherwise memory. */
6819 if (!packed)
6820 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6821 if (TREE_CODE (field) == FIELD_DECL && DECL_PACKED (field))
6823 packed = true;
6824 break;
6827 /* Walk the real fields, but skip those with no size or a zero size.
6828 ??? Fields with variable offset are handled as having zero offset. */
6829 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6830 if (TREE_CODE (field) == FIELD_DECL)
6832 if (!DECL_SIZE (field) || integer_zerop (DECL_SIZE (field)))
6833 continue;
6835 int bitpos = offset;
6836 if (TREE_CODE (DECL_FIELD_OFFSET (field)) == INTEGER_CST)
6837 bitpos += int_bit_position (field);
6839 tree field_type = TREE_TYPE (field);
6840 if (TREE_CODE (field_type) == RECORD_TYPE)
6841 traverse_record_type<T, Func> (field_type, named, data, bitpos,
6842 packed);
6843 else
6845 const bool fp_type = fp_type_for_abi (field_type);
6846 Func (field, bitpos, fp_type && named && !packed && TARGET_FPU,
6847 data);
6852 /* Handle recursive register classifying for structure layout. */
6854 typedef struct
6856 bool fp_regs; /* true if field eligible to FP registers. */
6857 bool fp_regs_in_first_word; /* true if such field in first word. */
6858 } classify_data_t;
6860 /* A subroutine of function_arg_slotno. Classify the field. */
6862 inline void
6863 classify_registers (const_tree, int bitpos, bool fp, classify_data_t *data)
6865 if (fp)
6867 data->fp_regs = true;
6868 if (bitpos < BITS_PER_WORD)
6869 data->fp_regs_in_first_word = true;
6873 /* Compute the slot number to pass an argument in.
6874 Return the slot number or -1 if passing on the stack.
6876 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6877 the preceding args and about the function being called.
6878 MODE is the argument's machine mode.
6879 TYPE is the data type of the argument (as a tree).
6880 This is null for libcalls where that information may
6881 not be available.
6882 NAMED is nonzero if this argument is a named parameter
6883 (otherwise it is an extra parameter matching an ellipsis).
6884 INCOMING is zero for FUNCTION_ARG, nonzero for FUNCTION_INCOMING_ARG.
6885 *PREGNO records the register number to use if scalar type.
6886 *PPADDING records the amount of padding needed in words. */
6888 static int
6889 function_arg_slotno (const struct sparc_args *cum, machine_mode mode,
6890 const_tree type, bool named, bool incoming,
6891 int *pregno, int *ppadding)
6893 const int regbase
6894 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
6895 int slotno = cum->words, regno;
6896 enum mode_class mclass = GET_MODE_CLASS (mode);
6898 /* Silence warnings in the callers. */
6899 *pregno = -1;
6900 *ppadding = -1;
6902 if (type && TREE_ADDRESSABLE (type))
6903 return -1;
6905 /* In 64-bit mode, objects requiring 16-byte alignment get it. */
6906 if (TARGET_ARCH64
6907 && (type ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode)) >= 128
6908 && (slotno & 1) != 0)
6910 slotno++;
6911 *ppadding = 1;
6913 else
6914 *ppadding = 0;
6916 /* Vector types deserve special treatment because they are polymorphic wrt
6917 their mode, depending upon whether VIS instructions are enabled. */
6918 if (type && VECTOR_TYPE_P (type))
6920 if (SCALAR_FLOAT_TYPE_P (TREE_TYPE (type)))
6922 /* The SPARC port defines no floating-point vector modes. */
6923 gcc_assert (mode == BLKmode);
6925 else
6927 /* Integer vector types should either have a vector
6928 mode or an integral mode, because we are guaranteed
6929 by pass_by_reference that their size is not greater
6930 than 16 bytes and TImode is 16-byte wide. */
6931 gcc_assert (mode != BLKmode);
6933 /* Integer vectors are handled like floats as per
6934 the Sun VIS SDK. */
6935 mclass = MODE_FLOAT;
6939 switch (mclass)
6941 case MODE_FLOAT:
6942 case MODE_COMPLEX_FLOAT:
6943 case MODE_VECTOR_INT:
6944 if (TARGET_ARCH64 && TARGET_FPU && named)
6946 /* If all arg slots are filled, then must pass on stack. */
6947 if (slotno >= SPARC_FP_ARG_MAX)
6948 return -1;
6950 regno = SPARC_FP_ARG_FIRST + slotno * 2;
6951 /* Arguments filling only one single FP register are
6952 right-justified in the outer double FP register. */
6953 if (GET_MODE_SIZE (mode) <= 4)
6954 regno++;
6955 break;
6957 /* fallthrough */
6959 case MODE_INT:
6960 case MODE_COMPLEX_INT:
6961 /* If all arg slots are filled, then must pass on stack. */
6962 if (slotno >= SPARC_INT_ARG_MAX)
6963 return -1;
6965 regno = regbase + slotno;
6966 break;
6968 case MODE_RANDOM:
6969 /* MODE is VOIDmode when generating the actual call. */
6970 if (mode == VOIDmode)
6971 return -1;
6973 if (TARGET_64BIT && TARGET_FPU && named
6974 && type
6975 && (TREE_CODE (type) == RECORD_TYPE || VECTOR_TYPE_P (type)))
6977 /* If all arg slots are filled, then must pass on stack. */
6978 if (slotno >= SPARC_FP_ARG_MAX)
6979 return -1;
6981 if (TREE_CODE (type) == RECORD_TYPE)
6983 classify_data_t data = { false, false };
6984 traverse_record_type<classify_data_t, classify_registers>
6985 (type, named, &data);
6987 if (data.fp_regs)
6989 /* If all FP slots are filled except for the last one and
6990 there is no FP field in the first word, then must pass
6991 on stack. */
6992 if (slotno >= SPARC_FP_ARG_MAX - 1
6993 && !data.fp_regs_in_first_word)
6994 return -1;
6996 else
6998 /* If all int slots are filled, then must pass on stack. */
6999 if (slotno >= SPARC_INT_ARG_MAX)
7000 return -1;
7003 /* PREGNO isn't set since both int and FP regs can be used. */
7004 return slotno;
7007 regno = SPARC_FP_ARG_FIRST + slotno * 2;
7009 else
7011 /* If all arg slots are filled, then must pass on stack. */
7012 if (slotno >= SPARC_INT_ARG_MAX)
7013 return -1;
7015 regno = regbase + slotno;
7017 break;
7019 default :
7020 gcc_unreachable ();
7023 *pregno = regno;
7024 return slotno;
7027 /* Handle recursive register counting/assigning for structure layout. */
7029 typedef struct
7031 int slotno; /* slot number of the argument. */
7032 int regbase; /* regno of the base register. */
7033 int intoffset; /* offset of the first pending integer field. */
7034 int nregs; /* number of words passed in registers. */
7035 bool stack; /* true if part of the argument is on the stack. */
7036 rtx ret; /* return expression being built. */
7037 } assign_data_t;
7039 /* A subroutine of function_arg_record_value. Compute the number of integer
7040 registers to be assigned between PARMS->intoffset and BITPOS. Return
7041 true if at least one integer register is assigned or false otherwise. */
7043 static bool
7044 compute_int_layout (int bitpos, assign_data_t *data, int *pnregs)
7046 if (data->intoffset < 0)
7047 return false;
7049 const int intoffset = data->intoffset;
7050 data->intoffset = -1;
7052 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7053 const unsigned int startbit = ROUND_DOWN (intoffset, BITS_PER_WORD);
7054 const unsigned int endbit = ROUND_UP (bitpos, BITS_PER_WORD);
7055 int nregs = (endbit - startbit) / BITS_PER_WORD;
7057 if (nregs > 0 && nregs > SPARC_INT_ARG_MAX - this_slotno)
7059 nregs = SPARC_INT_ARG_MAX - this_slotno;
7061 /* We need to pass this field (partly) on the stack. */
7062 data->stack = 1;
7065 if (nregs <= 0)
7066 return false;
7068 *pnregs = nregs;
7069 return true;
7072 /* A subroutine of function_arg_record_value. Compute the number and the mode
7073 of the FP registers to be assigned for FIELD. Return true if at least one
7074 FP register is assigned or false otherwise. */
7076 static bool
7077 compute_fp_layout (const_tree field, int bitpos, assign_data_t *data,
7078 int *pnregs, machine_mode *pmode)
7080 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7081 machine_mode mode = DECL_MODE (field);
7082 int nregs, nslots;
7084 /* Slots are counted as words while regs are counted as having the size of
7085 the (inner) mode. */
7086 if (VECTOR_TYPE_P (TREE_TYPE (field)) && mode == BLKmode)
7088 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7089 nregs = TYPE_VECTOR_SUBPARTS (TREE_TYPE (field));
7091 else if (TREE_CODE (TREE_TYPE (field)) == COMPLEX_TYPE)
7093 mode = TYPE_MODE (TREE_TYPE (TREE_TYPE (field)));
7094 nregs = 2;
7096 else if (TREE_CODE (TREE_TYPE (field)) == ARRAY_TYPE)
7098 tree elt_type = strip_array_types (TREE_TYPE (field));
7099 mode = TYPE_MODE (elt_type);
7100 nregs
7101 = int_size_in_bytes (TREE_TYPE (field)) / int_size_in_bytes (elt_type);
7103 else
7104 nregs = 1;
7106 nslots = CEIL_NWORDS (nregs * GET_MODE_SIZE (mode));
7108 if (nslots > SPARC_FP_ARG_MAX - this_slotno)
7110 nslots = SPARC_FP_ARG_MAX - this_slotno;
7111 nregs = (nslots * UNITS_PER_WORD) / GET_MODE_SIZE (mode);
7113 /* We need to pass this field (partly) on the stack. */
7114 data->stack = 1;
7116 if (nregs <= 0)
7117 return false;
7120 *pnregs = nregs;
7121 *pmode = mode;
7122 return true;
7125 /* A subroutine of function_arg_record_value. Count the number of registers
7126 to be assigned for FIELD and between PARMS->intoffset and BITPOS. */
7128 inline void
7129 count_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7131 if (fp)
7133 int nregs;
7134 machine_mode mode;
7136 if (compute_int_layout (bitpos, data, &nregs))
7137 data->nregs += nregs;
7139 if (compute_fp_layout (field, bitpos, data, &nregs, &mode))
7140 data->nregs += nregs;
7142 else
7144 if (data->intoffset < 0)
7145 data->intoffset = bitpos;
7149 /* A subroutine of function_arg_record_value. Assign the bits of the
7150 structure between PARMS->intoffset and BITPOS to integer registers. */
7152 static void
7153 assign_int_registers (int bitpos, assign_data_t *data)
7155 int intoffset = data->intoffset;
7156 machine_mode mode;
7157 int nregs;
7159 if (!compute_int_layout (bitpos, data, &nregs))
7160 return;
7162 /* If this is the trailing part of a word, only load that much into
7163 the register. Otherwise load the whole register. Note that in
7164 the latter case we may pick up unwanted bits. It's not a problem
7165 at the moment but may wish to revisit. */
7166 if (intoffset % BITS_PER_WORD != 0)
7167 mode = smallest_int_mode_for_size (BITS_PER_WORD
7168 - intoffset % BITS_PER_WORD).require ();
7169 else
7170 mode = word_mode;
7172 const int this_slotno = data->slotno + intoffset / BITS_PER_WORD;
7173 unsigned int regno = data->regbase + this_slotno;
7174 intoffset /= BITS_PER_UNIT;
7178 rtx reg = gen_rtx_REG (mode, regno);
7179 XVECEXP (data->ret, 0, data->stack + data->nregs)
7180 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (intoffset));
7181 data->nregs += 1;
7182 mode = word_mode;
7183 regno += 1;
7184 intoffset = (intoffset | (UNITS_PER_WORD - 1)) + 1;
7186 while (--nregs > 0);
7189 /* A subroutine of function_arg_record_value. Assign FIELD at position
7190 BITPOS to FP registers. */
7192 static void
7193 assign_fp_registers (const_tree field, int bitpos, assign_data_t *data)
7195 int nregs;
7196 machine_mode mode;
7198 if (!compute_fp_layout (field, bitpos, data, &nregs, &mode))
7199 return;
7201 const int this_slotno = data->slotno + bitpos / BITS_PER_WORD;
7202 int regno = SPARC_FP_ARG_FIRST + this_slotno * 2;
7203 if (GET_MODE_SIZE (mode) <= 4 && (bitpos & 32) != 0)
7204 regno++;
7205 int pos = bitpos / BITS_PER_UNIT;
7209 rtx reg = gen_rtx_REG (mode, regno);
7210 XVECEXP (data->ret, 0, data->stack + data->nregs)
7211 = gen_rtx_EXPR_LIST (VOIDmode, reg, GEN_INT (pos));
7212 data->nregs += 1;
7213 regno += GET_MODE_SIZE (mode) / 4;
7214 pos += GET_MODE_SIZE (mode);
7216 while (--nregs > 0);
7219 /* A subroutine of function_arg_record_value. Assign FIELD and the bits of
7220 the structure between PARMS->intoffset and BITPOS to registers. */
7222 inline void
7223 assign_registers (const_tree field, int bitpos, bool fp, assign_data_t *data)
7225 if (fp)
7227 assign_int_registers (bitpos, data);
7229 assign_fp_registers (field, bitpos, data);
7231 else
7233 if (data->intoffset < 0)
7234 data->intoffset = bitpos;
7238 /* Used by function_arg and function_value to implement the complex
7239 conventions of the 64-bit ABI for passing and returning structures.
7240 Return an expression valid as a return value for the FUNCTION_ARG
7241 and TARGET_FUNCTION_VALUE.
7243 TYPE is the data type of the argument (as a tree).
7244 This is null for libcalls where that information may
7245 not be available.
7246 MODE is the argument's machine mode.
7247 SLOTNO is the index number of the argument's slot in the parameter array.
7248 NAMED is true if this argument is a named parameter
7249 (otherwise it is an extra parameter matching an ellipsis).
7250 REGBASE is the regno of the base register for the parameter array. */
7252 static rtx
7253 function_arg_record_value (const_tree type, machine_mode mode,
7254 int slotno, bool named, int regbase)
7256 const int size = int_size_in_bytes (type);
7257 assign_data_t data;
7258 int nregs;
7260 data.slotno = slotno;
7261 data.regbase = regbase;
7263 /* Count how many registers we need. */
7264 data.nregs = 0;
7265 data.intoffset = 0;
7266 data.stack = false;
7267 traverse_record_type<assign_data_t, count_registers> (type, named, &data);
7269 /* Take into account pending integer fields. */
7270 if (compute_int_layout (size * BITS_PER_UNIT, &data, &nregs))
7271 data.nregs += nregs;
7273 /* Allocate the vector and handle some annoying special cases. */
7274 nregs = data.nregs;
7276 if (nregs == 0)
7278 /* ??? Empty structure has no value? Duh? */
7279 if (size <= 0)
7281 /* Though there's nothing really to store, return a word register
7282 anyway so the rest of gcc doesn't go nuts. Returning a PARALLEL
7283 leads to breakage due to the fact that there are zero bytes to
7284 load. */
7285 return gen_rtx_REG (mode, regbase);
7288 /* ??? C++ has structures with no fields, and yet a size. Give up
7289 for now and pass everything back in integer registers. */
7290 nregs = CEIL_NWORDS (size);
7291 if (nregs + slotno > SPARC_INT_ARG_MAX)
7292 nregs = SPARC_INT_ARG_MAX - slotno;
7295 gcc_assert (nregs > 0);
7297 data.ret = gen_rtx_PARALLEL (mode, rtvec_alloc (data.stack + nregs));
7299 /* If at least one field must be passed on the stack, generate
7300 (parallel [(expr_list (nil) ...) ...]) so that all fields will
7301 also be passed on the stack. We can't do much better because the
7302 semantics of TARGET_ARG_PARTIAL_BYTES doesn't handle the case
7303 of structures for which the fields passed exclusively in registers
7304 are not at the beginning of the structure. */
7305 if (data.stack)
7306 XVECEXP (data.ret, 0, 0)
7307 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7309 /* Assign the registers. */
7310 data.nregs = 0;
7311 data.intoffset = 0;
7312 traverse_record_type<assign_data_t, assign_registers> (type, named, &data);
7314 /* Assign pending integer fields. */
7315 assign_int_registers (size * BITS_PER_UNIT, &data);
7317 gcc_assert (data.nregs == nregs);
7319 return data.ret;
7322 /* Used by function_arg and function_value to implement the conventions
7323 of the 64-bit ABI for passing and returning unions.
7324 Return an expression valid as a return value for the FUNCTION_ARG
7325 and TARGET_FUNCTION_VALUE.
7327 SIZE is the size in bytes of the union.
7328 MODE is the argument's machine mode.
7329 SLOTNO is the index number of the argument's slot in the parameter array.
7330 REGNO is the hard register the union will be passed in. */
7332 static rtx
7333 function_arg_union_value (int size, machine_mode mode, int slotno, int regno)
7335 unsigned int nwords;
7337 /* See comment in function_arg_record_value for empty structures. */
7338 if (size <= 0)
7339 return gen_rtx_REG (mode, regno);
7341 if (slotno == SPARC_INT_ARG_MAX - 1)
7342 nwords = 1;
7343 else
7344 nwords = CEIL_NWORDS (size);
7346 rtx regs = gen_rtx_PARALLEL (mode, rtvec_alloc (nwords));
7348 /* Unions are passed left-justified. */
7349 for (unsigned int i = 0; i < nwords; i++)
7350 XVECEXP (regs, 0, i)
7351 = gen_rtx_EXPR_LIST (VOIDmode,
7352 gen_rtx_REG (word_mode, regno + i),
7353 GEN_INT (UNITS_PER_WORD * i));
7355 return regs;
7358 /* Used by function_arg and function_value to implement the conventions
7359 of the 64-bit ABI for passing and returning BLKmode vectors.
7360 Return an expression valid as a return value for the FUNCTION_ARG
7361 and TARGET_FUNCTION_VALUE.
7363 SIZE is the size in bytes of the vector.
7364 SLOTNO is the index number of the argument's slot in the parameter array.
7365 NAMED is true if this argument is a named parameter
7366 (otherwise it is an extra parameter matching an ellipsis).
7367 REGNO is the hard register the vector will be passed in. */
7369 static rtx
7370 function_arg_vector_value (int size, int slotno, bool named, int regno)
7372 const int mult = (named ? 2 : 1);
7373 unsigned int nwords;
7375 if (slotno == (named ? SPARC_FP_ARG_MAX : SPARC_INT_ARG_MAX) - 1)
7376 nwords = 1;
7377 else
7378 nwords = CEIL_NWORDS (size);
7380 rtx regs = gen_rtx_PARALLEL (BLKmode, rtvec_alloc (nwords));
7382 if (size < UNITS_PER_WORD)
7383 XVECEXP (regs, 0, 0)
7384 = gen_rtx_EXPR_LIST (VOIDmode,
7385 gen_rtx_REG (SImode, regno),
7386 const0_rtx);
7387 else
7388 for (unsigned int i = 0; i < nwords; i++)
7389 XVECEXP (regs, 0, i)
7390 = gen_rtx_EXPR_LIST (VOIDmode,
7391 gen_rtx_REG (word_mode, regno + i * mult),
7392 GEN_INT (i * UNITS_PER_WORD));
7394 return regs;
7397 /* Determine where to put an argument to a function.
7398 Value is zero to push the argument on the stack,
7399 or a hard register in which to store the argument.
7401 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7402 the preceding args and about the function being called.
7403 ARG is a description of the argument.
7404 INCOMING_P is false for TARGET_FUNCTION_ARG, true for
7405 TARGET_FUNCTION_INCOMING_ARG. */
7407 static rtx
7408 sparc_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
7409 bool incoming)
7411 const CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7412 const int regbase
7413 = incoming ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7414 int slotno, regno, padding;
7415 tree type = arg.type;
7416 machine_mode mode = arg.mode;
7417 enum mode_class mclass = GET_MODE_CLASS (mode);
7418 bool named = arg.named;
7420 slotno
7421 = function_arg_slotno (cum, mode, type, named, incoming, &regno, &padding);
7422 if (slotno == -1)
7423 return 0;
7425 /* Integer vectors are handled like floats as per the Sun VIS SDK. */
7426 if (type && VECTOR_INTEGER_TYPE_P (type))
7427 mclass = MODE_FLOAT;
7429 if (TARGET_ARCH32)
7430 return gen_rtx_REG (mode, regno);
7432 /* Structures up to 16 bytes in size are passed in arg slots on the stack
7433 and are promoted to registers if possible. */
7434 if (type && TREE_CODE (type) == RECORD_TYPE)
7436 const int size = int_size_in_bytes (type);
7437 gcc_assert (size <= 16);
7439 return function_arg_record_value (type, mode, slotno, named, regbase);
7442 /* Unions up to 16 bytes in size are passed in integer registers. */
7443 else if (type && TREE_CODE (type) == UNION_TYPE)
7445 const int size = int_size_in_bytes (type);
7446 gcc_assert (size <= 16);
7448 return function_arg_union_value (size, mode, slotno, regno);
7451 /* Floating-point vectors up to 16 bytes are passed in registers. */
7452 else if (type && VECTOR_TYPE_P (type) && mode == BLKmode)
7454 const int size = int_size_in_bytes (type);
7455 gcc_assert (size <= 16);
7457 return function_arg_vector_value (size, slotno, named, regno);
7460 /* v9 fp args in reg slots beyond the int reg slots get passed in regs
7461 but also have the slot allocated for them.
7462 If no prototype is in scope fp values in register slots get passed
7463 in two places, either fp regs and int regs or fp regs and memory. */
7464 else if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7465 && SPARC_FP_REG_P (regno))
7467 rtx reg = gen_rtx_REG (mode, regno);
7468 if (cum->prototype_p || cum->libcall_p)
7469 return reg;
7470 else
7472 rtx v0, v1;
7474 if ((regno - SPARC_FP_ARG_FIRST) < SPARC_INT_ARG_MAX * 2)
7476 int intreg;
7478 /* On incoming, we don't need to know that the value
7479 is passed in %f0 and %i0, and it confuses other parts
7480 causing needless spillage even on the simplest cases. */
7481 if (incoming)
7482 return reg;
7484 intreg = (SPARC_OUTGOING_INT_ARG_FIRST
7485 + (regno - SPARC_FP_ARG_FIRST) / 2);
7487 v0 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7488 v1 = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, intreg),
7489 const0_rtx);
7490 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7492 else
7494 v0 = gen_rtx_EXPR_LIST (VOIDmode, NULL_RTX, const0_rtx);
7495 v1 = gen_rtx_EXPR_LIST (VOIDmode, reg, const0_rtx);
7496 return gen_rtx_PARALLEL (mode, gen_rtvec (2, v0, v1));
7501 /* All other aggregate types are passed in an integer register in a mode
7502 corresponding to the size of the type. */
7503 else if (type && AGGREGATE_TYPE_P (type))
7505 const int size = int_size_in_bytes (type);
7506 gcc_assert (size <= 16);
7508 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7511 return gen_rtx_REG (mode, regno);
7514 /* Handle the TARGET_FUNCTION_ARG target hook. */
7516 static rtx
7517 sparc_function_arg (cumulative_args_t cum, const function_arg_info &arg)
7519 return sparc_function_arg_1 (cum, arg, false);
7522 /* Handle the TARGET_FUNCTION_INCOMING_ARG target hook. */
7524 static rtx
7525 sparc_function_incoming_arg (cumulative_args_t cum,
7526 const function_arg_info &arg)
7528 return sparc_function_arg_1 (cum, arg, true);
7531 /* For sparc64, objects requiring 16 byte alignment are passed that way. */
7533 static unsigned int
7534 sparc_function_arg_boundary (machine_mode mode, const_tree type)
7536 return ((TARGET_ARCH64
7537 && (GET_MODE_ALIGNMENT (mode) == 128
7538 || (type && TYPE_ALIGN (type) == 128)))
7539 ? 128
7540 : PARM_BOUNDARY);
7543 /* For an arg passed partly in registers and partly in memory,
7544 this is the number of bytes of registers used.
7545 For args passed entirely in registers or entirely in memory, zero.
7547 Any arg that starts in the first 6 regs but won't entirely fit in them
7548 needs partial registers on v8. On v9, structures with integer
7549 values in arg slots 5,6 will be passed in %o5 and SP+176, and complex fp
7550 values that begin in the last fp reg [where "last fp reg" varies with the
7551 mode] will be split between that reg and memory. */
7553 static int
7554 sparc_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
7556 int slotno, regno, padding;
7558 /* We pass false for incoming here, it doesn't matter. */
7559 slotno = function_arg_slotno (get_cumulative_args (cum), arg.mode, arg.type,
7560 arg.named, false, &regno, &padding);
7562 if (slotno == -1)
7563 return 0;
7565 if (TARGET_ARCH32)
7567 /* We are guaranteed by pass_by_reference that the size of the
7568 argument is not greater than 8 bytes, so we only need to return
7569 one word if the argument is partially passed in registers. */
7570 const int size = GET_MODE_SIZE (arg.mode);
7572 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7573 return UNITS_PER_WORD;
7575 else
7577 /* We are guaranteed by pass_by_reference that the size of the
7578 argument is not greater than 16 bytes, so we only need to return
7579 one word if the argument is partially passed in registers. */
7580 if (arg.aggregate_type_p ())
7582 const int size = int_size_in_bytes (arg.type);
7584 if (size > UNITS_PER_WORD
7585 && (slotno == SPARC_INT_ARG_MAX - 1
7586 || slotno == SPARC_FP_ARG_MAX - 1))
7587 return UNITS_PER_WORD;
7589 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_INT
7590 || ((GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7591 || (arg.type && VECTOR_TYPE_P (arg.type)))
7592 && !(TARGET_FPU && arg.named)))
7594 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7595 ? int_size_in_bytes (arg.type)
7596 : GET_MODE_SIZE (arg.mode);
7598 if (size > UNITS_PER_WORD && slotno == SPARC_INT_ARG_MAX - 1)
7599 return UNITS_PER_WORD;
7601 else if (GET_MODE_CLASS (arg.mode) == MODE_COMPLEX_FLOAT
7602 || (arg.type && VECTOR_TYPE_P (arg.type)))
7604 const int size = (arg.type && VECTOR_FLOAT_TYPE_P (arg.type))
7605 ? int_size_in_bytes (arg.type)
7606 : GET_MODE_SIZE (arg.mode);
7608 if (size > UNITS_PER_WORD && slotno == SPARC_FP_ARG_MAX - 1)
7609 return UNITS_PER_WORD;
7613 return 0;
7616 /* Handle the TARGET_FUNCTION_ARG_ADVANCE hook.
7617 Update the data in CUM to advance over argument ARG. */
7619 static void
7620 sparc_function_arg_advance (cumulative_args_t cum_v,
7621 const function_arg_info &arg)
7623 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7624 tree type = arg.type;
7625 machine_mode mode = arg.mode;
7626 int regno, padding;
7628 /* We pass false for incoming here, it doesn't matter. */
7629 function_arg_slotno (cum, mode, type, arg.named, false, &regno, &padding);
7631 /* If argument requires leading padding, add it. */
7632 cum->words += padding;
7634 if (TARGET_ARCH32)
7635 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7636 else
7638 /* For types that can have BLKmode, get the size from the type. */
7639 if (type && (AGGREGATE_TYPE_P (type) || VECTOR_FLOAT_TYPE_P (type)))
7641 const int size = int_size_in_bytes (type);
7643 /* See comment in function_arg_record_value for empty structures. */
7644 if (size <= 0)
7645 cum->words++;
7646 else
7647 cum->words += CEIL_NWORDS (size);
7649 else
7650 cum->words += CEIL_NWORDS (GET_MODE_SIZE (mode));
7654 /* Implement TARGET_FUNCTION_ARG_PADDING. For the 64-bit ABI structs
7655 are always stored left shifted in their argument slot. */
7657 static pad_direction
7658 sparc_function_arg_padding (machine_mode mode, const_tree type)
7660 if (TARGET_ARCH64 && type && AGGREGATE_TYPE_P (type))
7661 return PAD_UPWARD;
7663 /* Fall back to the default. */
7664 return default_function_arg_padding (mode, type);
7667 /* Handle the TARGET_RETURN_IN_MEMORY target hook.
7668 Specify whether to return the return value in memory. */
7670 static bool
7671 sparc_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
7673 if (TARGET_ARCH32)
7674 /* Original SPARC 32-bit ABI says that structures and unions, and
7675 quad-precision floats are returned in memory. But note that the
7676 first part is implemented through -fpcc-struct-return being the
7677 default, so here we only implement -freg-struct-return instead.
7678 All other base types are returned in registers.
7680 Extended ABI (as implemented by the Sun compiler) says that
7681 all complex floats are returned in registers (8 FP registers
7682 at most for '_Complex long double'). Return all complex integers
7683 in registers (4 at most for '_Complex long long').
7685 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7686 integers are returned like floats of the same size, that is in
7687 registers up to 8 bytes and in memory otherwise. Return all
7688 vector floats in memory like structure and unions; note that
7689 they always have BLKmode like the latter. */
7690 return (TYPE_MODE (type) == BLKmode
7691 || TYPE_MODE (type) == TFmode
7692 || (TREE_CODE (type) == VECTOR_TYPE
7693 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 8));
7694 else
7695 /* Original SPARC 64-bit ABI says that structures and unions
7696 smaller than 32 bytes are returned in registers, as well as
7697 all other base types.
7699 Extended ABI (as implemented by the Sun compiler) says that all
7700 complex floats are returned in registers (8 FP registers at most
7701 for '_Complex long double'). Return all complex integers in
7702 registers (4 at most for '_Complex TItype').
7704 Vector ABI (as implemented by the Sun VIS SDK) says that vector
7705 integers are returned like floats of the same size, that is in
7706 registers. Return all vector floats like structure and unions;
7707 note that they always have BLKmode like the latter. */
7708 return (TYPE_MODE (type) == BLKmode
7709 && (unsigned HOST_WIDE_INT) int_size_in_bytes (type) > 32);
7712 /* Handle the TARGET_STRUCT_VALUE target hook.
7713 Return where to find the structure return value address. */
7715 static rtx
7716 sparc_struct_value_rtx (tree fndecl, int incoming)
7718 if (TARGET_ARCH64)
7719 return NULL_RTX;
7720 else
7722 rtx mem;
7724 if (incoming)
7725 mem = gen_frame_mem (Pmode, plus_constant (Pmode, frame_pointer_rtx,
7726 STRUCT_VALUE_OFFSET));
7727 else
7728 mem = gen_frame_mem (Pmode, plus_constant (Pmode, stack_pointer_rtx,
7729 STRUCT_VALUE_OFFSET));
7731 /* Only follow the SPARC ABI for fixed-size structure returns.
7732 Variable size structure returns are handled per the normal
7733 procedures in GCC. This is enabled by -mstd-struct-return */
7734 if (incoming == 2
7735 && sparc_std_struct_return
7736 && TYPE_SIZE_UNIT (TREE_TYPE (fndecl))
7737 && TREE_CODE (TYPE_SIZE_UNIT (TREE_TYPE (fndecl))) == INTEGER_CST)
7739 /* We must check and adjust the return address, as it is optional
7740 as to whether the return object is really provided. */
7741 rtx ret_reg = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
7742 rtx scratch = gen_reg_rtx (SImode);
7743 rtx_code_label *endlab = gen_label_rtx ();
7745 /* Calculate the return object size. */
7746 tree size = TYPE_SIZE_UNIT (TREE_TYPE (fndecl));
7747 rtx size_rtx = GEN_INT (TREE_INT_CST_LOW (size) & 0xfff);
7748 /* Construct a temporary return value. */
7749 rtx temp_val
7750 = assign_stack_local (Pmode, TREE_INT_CST_LOW (size), 0);
7752 /* Implement SPARC 32-bit psABI callee return struct checking:
7754 Fetch the instruction where we will return to and see if
7755 it's an unimp instruction (the most significant 10 bits
7756 will be zero). */
7757 emit_move_insn (scratch, gen_rtx_MEM (SImode,
7758 plus_constant (Pmode,
7759 ret_reg, 8)));
7760 /* Assume the size is valid and pre-adjust. */
7761 emit_insn (gen_add3_insn (ret_reg, ret_reg, GEN_INT (4)));
7762 emit_cmp_and_jump_insns (scratch, size_rtx, EQ, const0_rtx, SImode,
7763 0, endlab);
7764 emit_insn (gen_sub3_insn (ret_reg, ret_reg, GEN_INT (4)));
7765 /* Write the address of the memory pointed to by temp_val into
7766 the memory pointed to by mem. */
7767 emit_move_insn (mem, XEXP (temp_val, 0));
7768 emit_label (endlab);
7771 return mem;
7775 /* Handle TARGET_FUNCTION_VALUE, and TARGET_LIBCALL_VALUE target hook.
7776 For v9, function return values are subject to the same rules as arguments,
7777 except that up to 32 bytes may be returned in registers. */
7779 static rtx
7780 sparc_function_value_1 (const_tree type, machine_mode mode, bool outgoing)
7782 /* Beware that the two values are swapped here wrt function_arg. */
7783 const int regbase
7784 = outgoing ? SPARC_INCOMING_INT_ARG_FIRST : SPARC_OUTGOING_INT_ARG_FIRST;
7785 enum mode_class mclass = GET_MODE_CLASS (mode);
7786 int regno;
7788 /* Integer vectors are handled like floats as per the Sun VIS SDK.
7789 Note that integer vectors larger than 16 bytes have BLKmode so
7790 they need to be handled like floating-point vectors below. */
7791 if (type && VECTOR_INTEGER_TYPE_P (type) && mode != BLKmode)
7792 mclass = MODE_FLOAT;
7794 if (TARGET_ARCH64 && type)
7796 /* Structures up to 32 bytes in size are returned in registers. */
7797 if (TREE_CODE (type) == RECORD_TYPE)
7799 const int size = int_size_in_bytes (type);
7800 gcc_assert (size <= 32);
7802 return function_arg_record_value (type, mode, 0, true, regbase);
7805 /* Unions up to 32 bytes in size are returned in integer registers. */
7806 else if (TREE_CODE (type) == UNION_TYPE)
7808 const int size = int_size_in_bytes (type);
7809 gcc_assert (size <= 32);
7811 return function_arg_union_value (size, mode, 0, regbase);
7814 /* Vectors up to 32 bytes are returned in FP registers. */
7815 else if (VECTOR_TYPE_P (type) && mode == BLKmode)
7817 const int size = int_size_in_bytes (type);
7818 gcc_assert (size <= 32);
7820 return function_arg_vector_value (size, 0, true, SPARC_FP_ARG_FIRST);
7823 /* Objects that require it are returned in FP registers. */
7824 else if (mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT)
7827 /* All other aggregate types are returned in an integer register in a
7828 mode corresponding to the size of the type. */
7829 else if (AGGREGATE_TYPE_P (type))
7831 /* All other aggregate types are passed in an integer register
7832 in a mode corresponding to the size of the type. */
7833 const int size = int_size_in_bytes (type);
7834 gcc_assert (size <= 32);
7836 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).else_blk ();
7838 /* ??? We probably should have made the same ABI change in
7839 3.4.0 as the one we made for unions. The latter was
7840 required by the SCD though, while the former is not
7841 specified, so we favored compatibility and efficiency.
7843 Now we're stuck for aggregates larger than 16 bytes,
7844 because OImode vanished in the meantime. Let's not
7845 try to be unduly clever, and simply follow the ABI
7846 for unions in that case. */
7847 if (mode == BLKmode)
7848 return function_arg_union_value (size, mode, 0, regbase);
7849 else
7850 mclass = MODE_INT;
7853 /* We should only have pointer and integer types at this point. This
7854 must match sparc_promote_function_mode. */
7855 else if (mclass == MODE_INT && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7856 mode = word_mode;
7859 /* We should only have pointer and integer types at this point, except with
7860 -freg-struct-return. This must match sparc_promote_function_mode. */
7861 else if (TARGET_ARCH32
7862 && !(type && AGGREGATE_TYPE_P (type))
7863 && mclass == MODE_INT
7864 && GET_MODE_SIZE (mode) < UNITS_PER_WORD)
7865 mode = word_mode;
7867 if ((mclass == MODE_FLOAT || mclass == MODE_COMPLEX_FLOAT) && TARGET_FPU)
7868 regno = SPARC_FP_ARG_FIRST;
7869 else
7870 regno = regbase;
7872 return gen_rtx_REG (mode, regno);
7875 /* Handle TARGET_FUNCTION_VALUE.
7876 On the SPARC, the value is found in the first "output" register, but the
7877 called function leaves it in the first "input" register. */
7879 static rtx
7880 sparc_function_value (const_tree valtype,
7881 const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
7882 bool outgoing)
7884 return sparc_function_value_1 (valtype, TYPE_MODE (valtype), outgoing);
7887 /* Handle TARGET_LIBCALL_VALUE. */
7889 static rtx
7890 sparc_libcall_value (machine_mode mode,
7891 const_rtx fun ATTRIBUTE_UNUSED)
7893 return sparc_function_value_1 (NULL_TREE, mode, false);
7896 /* Handle FUNCTION_VALUE_REGNO_P.
7897 On the SPARC, the first "output" reg is used for integer values, and the
7898 first floating point register is used for floating point values. */
7900 static bool
7901 sparc_function_value_regno_p (const unsigned int regno)
7903 return (regno == 8 || (TARGET_FPU && regno == 32));
7906 /* Do what is necessary for `va_start'. We look at the current function
7907 to determine if stdarg or varargs is used and return the address of
7908 the first unnamed parameter. */
7910 static rtx
7911 sparc_builtin_saveregs (void)
7913 int first_reg = crtl->args.info.words;
7914 rtx address;
7915 int regno;
7917 for (regno = first_reg; regno < SPARC_INT_ARG_MAX; regno++)
7918 emit_move_insn (gen_rtx_MEM (word_mode,
7919 gen_rtx_PLUS (Pmode,
7920 frame_pointer_rtx,
7921 GEN_INT (FIRST_PARM_OFFSET (0)
7922 + (UNITS_PER_WORD
7923 * regno)))),
7924 gen_rtx_REG (word_mode,
7925 SPARC_INCOMING_INT_ARG_FIRST + regno));
7927 address = gen_rtx_PLUS (Pmode,
7928 frame_pointer_rtx,
7929 GEN_INT (FIRST_PARM_OFFSET (0)
7930 + UNITS_PER_WORD * first_reg));
7932 return address;
7935 /* Implement `va_start' for stdarg. */
7937 static void
7938 sparc_va_start (tree valist, rtx nextarg)
7940 nextarg = expand_builtin_saveregs ();
7941 std_expand_builtin_va_start (valist, nextarg);
7944 /* Implement `va_arg' for stdarg. */
7946 static tree
7947 sparc_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
7948 gimple_seq *post_p)
7950 HOST_WIDE_INT size, rsize, align;
7951 tree addr, incr;
7952 bool indirect;
7953 tree ptrtype = build_pointer_type (type);
7955 if (pass_va_arg_by_reference (type))
7957 indirect = true;
7958 size = rsize = UNITS_PER_WORD;
7959 align = 0;
7961 else
7963 indirect = false;
7964 size = int_size_in_bytes (type);
7965 rsize = ROUND_UP (size, UNITS_PER_WORD);
7966 align = 0;
7968 if (TARGET_ARCH64)
7970 /* For SPARC64, objects requiring 16-byte alignment get it. */
7971 if (TYPE_ALIGN (type) >= 2 * (unsigned) BITS_PER_WORD)
7972 align = 2 * UNITS_PER_WORD;
7974 /* SPARC-V9 ABI states that structures up to 16 bytes in size
7975 are left-justified in their slots. */
7976 if (AGGREGATE_TYPE_P (type))
7978 if (size == 0)
7979 size = rsize = UNITS_PER_WORD;
7980 else
7981 size = rsize;
7986 incr = valist;
7987 if (align)
7989 incr = fold_build_pointer_plus_hwi (incr, align - 1);
7990 incr = fold_convert (sizetype, incr);
7991 incr = fold_build2 (BIT_AND_EXPR, sizetype, incr,
7992 size_int (-align));
7993 incr = fold_convert (ptr_type_node, incr);
7996 gimplify_expr (&incr, pre_p, post_p, is_gimple_val, fb_rvalue);
7997 addr = incr;
7999 if (BYTES_BIG_ENDIAN && size < rsize)
8000 addr = fold_build_pointer_plus_hwi (incr, rsize - size);
8002 if (indirect)
8004 addr = fold_convert (build_pointer_type (ptrtype), addr);
8005 addr = build_va_arg_indirect_ref (addr);
8008 /* If the address isn't aligned properly for the type, we need a temporary.
8009 FIXME: This is inefficient, usually we can do this in registers. */
8010 else if (align == 0 && TYPE_ALIGN (type) > BITS_PER_WORD)
8012 tree tmp = create_tmp_var (type, "va_arg_tmp");
8013 tree dest_addr = build_fold_addr_expr (tmp);
8014 tree copy = build_call_expr (builtin_decl_implicit (BUILT_IN_MEMCPY),
8015 3, dest_addr, addr, size_int (rsize));
8016 TREE_ADDRESSABLE (tmp) = 1;
8017 gimplify_and_add (copy, pre_p);
8018 addr = dest_addr;
8021 else
8022 addr = fold_convert (ptrtype, addr);
8024 incr = fold_build_pointer_plus_hwi (incr, rsize);
8025 gimplify_assign (valist, incr, post_p);
8027 return build_va_arg_indirect_ref (addr);
8030 /* Implement the TARGET_VECTOR_MODE_SUPPORTED_P target hook.
8031 Specify whether the vector mode is supported by the hardware. */
8033 static bool
8034 sparc_vector_mode_supported_p (machine_mode mode)
8036 return TARGET_VIS && VECTOR_MODE_P (mode) ? true : false;
8039 /* Implement the TARGET_VECTORIZE_PREFERRED_SIMD_MODE target hook. */
8041 static machine_mode
8042 sparc_preferred_simd_mode (scalar_mode mode)
8044 if (TARGET_VIS)
8045 switch (mode)
8047 case E_SImode:
8048 return V2SImode;
8049 case E_HImode:
8050 return V4HImode;
8051 case E_QImode:
8052 return V8QImode;
8054 default:;
8057 return word_mode;
8060 \f/* Implement TARGET_CAN_FOLLOW_JUMP. */
8062 static bool
8063 sparc_can_follow_jump (const rtx_insn *follower, const rtx_insn *followee)
8065 /* Do not fold unconditional jumps that have been created for crossing
8066 partition boundaries. */
8067 if (CROSSING_JUMP_P (followee) && !CROSSING_JUMP_P (follower))
8068 return false;
8070 return true;
8073 /* Return the string to output an unconditional branch to LABEL, which is
8074 the operand number of the label.
8076 DEST is the destination insn (i.e. the label), INSN is the source. */
8078 const char *
8079 output_ubranch (rtx dest, rtx_insn *insn)
8081 static char string[64];
8082 bool v9_form = false;
8083 int delta;
8084 char *p;
8086 /* Even if we are trying to use cbcond for this, evaluate
8087 whether we can use V9 branches as our backup plan. */
8088 delta = 5000000;
8089 if (!CROSSING_JUMP_P (insn) && INSN_ADDRESSES_SET_P ())
8090 delta = (INSN_ADDRESSES (INSN_UID (dest))
8091 - INSN_ADDRESSES (INSN_UID (insn)));
8093 /* Leave some instructions for "slop". */
8094 if (TARGET_V9 && delta >= -260000 && delta < 260000)
8095 v9_form = true;
8097 if (TARGET_CBCOND)
8099 bool emit_nop = emit_cbcond_nop (insn);
8100 bool far = false;
8101 const char *rval;
8103 if (delta < -500 || delta > 500)
8104 far = true;
8106 if (far)
8108 if (v9_form)
8109 rval = "ba,a,pt\t%%xcc, %l0";
8110 else
8111 rval = "b,a\t%l0";
8113 else
8115 if (emit_nop)
8116 rval = "cwbe\t%%g0, %%g0, %l0\n\tnop";
8117 else
8118 rval = "cwbe\t%%g0, %%g0, %l0";
8120 return rval;
8123 if (v9_form)
8124 strcpy (string, "ba%*,pt\t%%xcc, ");
8125 else
8126 strcpy (string, "b%*\t");
8128 p = strchr (string, '\0');
8129 *p++ = '%';
8130 *p++ = 'l';
8131 *p++ = '0';
8132 *p++ = '%';
8133 *p++ = '(';
8134 *p = '\0';
8136 return string;
8139 /* Return the string to output a conditional branch to LABEL, which is
8140 the operand number of the label. OP is the conditional expression.
8141 XEXP (OP, 0) is assumed to be a condition code register (integer or
8142 floating point) and its mode specifies what kind of comparison we made.
8144 DEST is the destination insn (i.e. the label), INSN is the source.
8146 REVERSED is nonzero if we should reverse the sense of the comparison.
8148 ANNUL is nonzero if we should generate an annulling branch. */
8150 const char *
8151 output_cbranch (rtx op, rtx dest, int label, int reversed, int annul,
8152 rtx_insn *insn)
8154 static char string[64];
8155 enum rtx_code code = GET_CODE (op);
8156 rtx cc_reg = XEXP (op, 0);
8157 machine_mode mode = GET_MODE (cc_reg);
8158 const char *labelno, *branch;
8159 int spaces = 8, far;
8160 char *p;
8162 /* v9 branches are limited to +-1MB. If it is too far away,
8163 change
8165 bne,pt %xcc, .LC30
8169 be,pn %xcc, .+12
8171 ba .LC30
8175 fbne,a,pn %fcc2, .LC29
8179 fbe,pt %fcc2, .+16
8181 ba .LC29 */
8183 far = TARGET_V9 && (get_attr_length (insn) >= 3);
8184 if (reversed ^ far)
8186 /* Reversal of FP compares takes care -- an ordered compare
8187 becomes an unordered compare and vice versa. */
8188 if (mode == CCFPmode || mode == CCFPEmode)
8189 code = reverse_condition_maybe_unordered (code);
8190 else
8191 code = reverse_condition (code);
8194 /* Start by writing the branch condition. */
8195 if (mode == CCFPmode || mode == CCFPEmode)
8197 switch (code)
8199 case NE:
8200 branch = "fbne";
8201 break;
8202 case EQ:
8203 branch = "fbe";
8204 break;
8205 case GE:
8206 branch = "fbge";
8207 break;
8208 case GT:
8209 branch = "fbg";
8210 break;
8211 case LE:
8212 branch = "fble";
8213 break;
8214 case LT:
8215 branch = "fbl";
8216 break;
8217 case UNORDERED:
8218 branch = "fbu";
8219 break;
8220 case ORDERED:
8221 branch = "fbo";
8222 break;
8223 case UNGT:
8224 branch = "fbug";
8225 break;
8226 case UNLT:
8227 branch = "fbul";
8228 break;
8229 case UNEQ:
8230 branch = "fbue";
8231 break;
8232 case UNGE:
8233 branch = "fbuge";
8234 break;
8235 case UNLE:
8236 branch = "fbule";
8237 break;
8238 case LTGT:
8239 branch = "fblg";
8240 break;
8241 default:
8242 gcc_unreachable ();
8245 /* ??? !v9: FP branches cannot be preceded by another floating point
8246 insn. Because there is currently no concept of pre-delay slots,
8247 we can fix this only by always emitting a nop before a floating
8248 point branch. */
8250 string[0] = '\0';
8251 if (! TARGET_V9)
8252 strcpy (string, "nop\n\t");
8253 strcat (string, branch);
8255 else
8257 switch (code)
8259 case NE:
8260 if (mode == CCVmode || mode == CCXVmode)
8261 branch = "bvs";
8262 else
8263 branch = "bne";
8264 break;
8265 case EQ:
8266 if (mode == CCVmode || mode == CCXVmode)
8267 branch = "bvc";
8268 else
8269 branch = "be";
8270 break;
8271 case GE:
8272 if (mode == CCNZmode || mode == CCXNZmode)
8273 branch = "bpos";
8274 else
8275 branch = "bge";
8276 break;
8277 case GT:
8278 branch = "bg";
8279 break;
8280 case LE:
8281 branch = "ble";
8282 break;
8283 case LT:
8284 if (mode == CCNZmode || mode == CCXNZmode)
8285 branch = "bneg";
8286 else
8287 branch = "bl";
8288 break;
8289 case GEU:
8290 branch = "bgeu";
8291 break;
8292 case GTU:
8293 branch = "bgu";
8294 break;
8295 case LEU:
8296 branch = "bleu";
8297 break;
8298 case LTU:
8299 branch = "blu";
8300 break;
8301 default:
8302 gcc_unreachable ();
8304 strcpy (string, branch);
8306 spaces -= strlen (branch);
8307 p = strchr (string, '\0');
8309 /* Now add the annulling, the label, and a possible noop. */
8310 if (annul && ! far)
8312 strcpy (p, ",a");
8313 p += 2;
8314 spaces -= 2;
8317 if (TARGET_V9)
8319 rtx note;
8320 int v8 = 0;
8322 if (! far && insn && INSN_ADDRESSES_SET_P ())
8324 int delta = (INSN_ADDRESSES (INSN_UID (dest))
8325 - INSN_ADDRESSES (INSN_UID (insn)));
8326 /* Leave some instructions for "slop". */
8327 if (delta < -260000 || delta >= 260000)
8328 v8 = 1;
8331 switch (mode)
8333 case E_CCmode:
8334 case E_CCNZmode:
8335 case E_CCCmode:
8336 case E_CCVmode:
8337 labelno = "%%icc, ";
8338 if (v8)
8339 labelno = "";
8340 break;
8341 case E_CCXmode:
8342 case E_CCXNZmode:
8343 case E_CCXCmode:
8344 case E_CCXVmode:
8345 labelno = "%%xcc, ";
8346 gcc_assert (!v8);
8347 break;
8348 case E_CCFPmode:
8349 case E_CCFPEmode:
8351 static char v9_fcc_labelno[] = "%%fccX, ";
8352 /* Set the char indicating the number of the fcc reg to use. */
8353 v9_fcc_labelno[5] = REGNO (cc_reg) - SPARC_FIRST_V9_FCC_REG + '0';
8354 labelno = v9_fcc_labelno;
8355 if (v8)
8357 gcc_assert (REGNO (cc_reg) == SPARC_FCC_REG);
8358 labelno = "";
8361 break;
8362 default:
8363 gcc_unreachable ();
8366 if (*labelno && insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8368 strcpy (p,
8369 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8370 >= profile_probability::even ()) ^ far)
8371 ? ",pt" : ",pn");
8372 p += 3;
8373 spaces -= 3;
8376 else
8377 labelno = "";
8379 if (spaces > 0)
8380 *p++ = '\t';
8381 else
8382 *p++ = ' ';
8383 strcpy (p, labelno);
8384 p = strchr (p, '\0');
8385 if (far)
8387 strcpy (p, ".+12\n\t nop\n\tb\t");
8388 /* Skip the next insn if requested or
8389 if we know that it will be a nop. */
8390 if (annul || ! final_sequence)
8391 p[3] = '6';
8392 p += 14;
8394 *p++ = '%';
8395 *p++ = 'l';
8396 *p++ = label + '0';
8397 *p++ = '%';
8398 *p++ = '#';
8399 *p = '\0';
8401 return string;
8404 /* Emit a library call comparison between floating point X and Y.
8405 COMPARISON is the operator to compare with (EQ, NE, GT, etc).
8406 Return the new operator to be used in the comparison sequence.
8408 TARGET_ARCH64 uses _Qp_* functions, which use pointers to TFmode
8409 values as arguments instead of the TFmode registers themselves,
8410 that's why we cannot call emit_float_lib_cmp. */
8413 sparc_emit_float_lib_cmp (rtx x, rtx y, enum rtx_code comparison)
8415 const char *qpfunc;
8416 rtx slot0, slot1, result, tem, tem2, libfunc;
8417 machine_mode mode;
8418 enum rtx_code new_comparison;
8420 switch (comparison)
8422 case EQ:
8423 qpfunc = (TARGET_ARCH64 ? "_Qp_feq" : "_Q_feq");
8424 break;
8426 case NE:
8427 qpfunc = (TARGET_ARCH64 ? "_Qp_fne" : "_Q_fne");
8428 break;
8430 case GT:
8431 qpfunc = (TARGET_ARCH64 ? "_Qp_fgt" : "_Q_fgt");
8432 break;
8434 case GE:
8435 qpfunc = (TARGET_ARCH64 ? "_Qp_fge" : "_Q_fge");
8436 break;
8438 case LT:
8439 qpfunc = (TARGET_ARCH64 ? "_Qp_flt" : "_Q_flt");
8440 break;
8442 case LE:
8443 qpfunc = (TARGET_ARCH64 ? "_Qp_fle" : "_Q_fle");
8444 break;
8446 case ORDERED:
8447 case UNORDERED:
8448 case UNGT:
8449 case UNLT:
8450 case UNEQ:
8451 case UNGE:
8452 case UNLE:
8453 case LTGT:
8454 qpfunc = (TARGET_ARCH64 ? "_Qp_cmp" : "_Q_cmp");
8455 break;
8457 default:
8458 gcc_unreachable ();
8461 if (TARGET_ARCH64)
8463 if (MEM_P (x))
8465 tree expr = MEM_EXPR (x);
8466 if (expr)
8467 mark_addressable (expr);
8468 slot0 = x;
8470 else
8472 slot0 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8473 emit_move_insn (slot0, x);
8476 if (MEM_P (y))
8478 tree expr = MEM_EXPR (y);
8479 if (expr)
8480 mark_addressable (expr);
8481 slot1 = y;
8483 else
8485 slot1 = assign_stack_temp (TFmode, GET_MODE_SIZE(TFmode));
8486 emit_move_insn (slot1, y);
8489 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8490 emit_library_call (libfunc, LCT_NORMAL,
8491 DImode,
8492 XEXP (slot0, 0), Pmode,
8493 XEXP (slot1, 0), Pmode);
8494 mode = DImode;
8496 else
8498 libfunc = gen_rtx_SYMBOL_REF (Pmode, qpfunc);
8499 emit_library_call (libfunc, LCT_NORMAL,
8500 SImode,
8501 x, TFmode, y, TFmode);
8502 mode = SImode;
8506 /* Immediately move the result of the libcall into a pseudo
8507 register so reload doesn't clobber the value if it needs
8508 the return register for a spill reg. */
8509 result = gen_reg_rtx (mode);
8510 emit_move_insn (result, hard_libcall_value (mode, libfunc));
8512 switch (comparison)
8514 default:
8515 return gen_rtx_NE (VOIDmode, result, const0_rtx);
8516 case ORDERED:
8517 case UNORDERED:
8518 new_comparison = (comparison == UNORDERED ? EQ : NE);
8519 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, GEN_INT(3));
8520 case UNGT:
8521 case UNGE:
8522 new_comparison = (comparison == UNGT ? GT : NE);
8523 return gen_rtx_fmt_ee (new_comparison, VOIDmode, result, const1_rtx);
8524 case UNLE:
8525 return gen_rtx_NE (VOIDmode, result, const2_rtx);
8526 case UNLT:
8527 tem = gen_reg_rtx (mode);
8528 if (TARGET_ARCH32)
8529 emit_insn (gen_andsi3 (tem, result, const1_rtx));
8530 else
8531 emit_insn (gen_anddi3 (tem, result, const1_rtx));
8532 return gen_rtx_NE (VOIDmode, tem, const0_rtx);
8533 case UNEQ:
8534 case LTGT:
8535 tem = gen_reg_rtx (mode);
8536 if (TARGET_ARCH32)
8537 emit_insn (gen_addsi3 (tem, result, const1_rtx));
8538 else
8539 emit_insn (gen_adddi3 (tem, result, const1_rtx));
8540 tem2 = gen_reg_rtx (mode);
8541 if (TARGET_ARCH32)
8542 emit_insn (gen_andsi3 (tem2, tem, const2_rtx));
8543 else
8544 emit_insn (gen_anddi3 (tem2, tem, const2_rtx));
8545 new_comparison = (comparison == UNEQ ? EQ : NE);
8546 return gen_rtx_fmt_ee (new_comparison, VOIDmode, tem2, const0_rtx);
8549 gcc_unreachable ();
8552 /* Generate an unsigned DImode to FP conversion. This is the same code
8553 optabs would emit if we didn't have TFmode patterns. */
8555 void
8556 sparc_emit_floatunsdi (rtx *operands, machine_mode mode)
8558 rtx i0, i1, f0, in, out;
8560 out = operands[0];
8561 in = force_reg (DImode, operands[1]);
8562 rtx_code_label *neglab = gen_label_rtx ();
8563 rtx_code_label *donelab = gen_label_rtx ();
8564 i0 = gen_reg_rtx (DImode);
8565 i1 = gen_reg_rtx (DImode);
8566 f0 = gen_reg_rtx (mode);
8568 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, DImode, 0, neglab);
8570 emit_insn (gen_rtx_SET (out, gen_rtx_FLOAT (mode, in)));
8571 emit_jump_insn (gen_jump (donelab));
8572 emit_barrier ();
8574 emit_label (neglab);
8576 emit_insn (gen_lshrdi3 (i0, in, const1_rtx));
8577 emit_insn (gen_anddi3 (i1, in, const1_rtx));
8578 emit_insn (gen_iordi3 (i0, i0, i1));
8579 emit_insn (gen_rtx_SET (f0, gen_rtx_FLOAT (mode, i0)));
8580 emit_insn (gen_rtx_SET (out, gen_rtx_PLUS (mode, f0, f0)));
8582 emit_label (donelab);
8585 /* Generate an FP to unsigned DImode conversion. This is the same code
8586 optabs would emit if we didn't have TFmode patterns. */
8588 void
8589 sparc_emit_fixunsdi (rtx *operands, machine_mode mode)
8591 rtx i0, i1, f0, in, out, limit;
8593 out = operands[0];
8594 in = force_reg (mode, operands[1]);
8595 rtx_code_label *neglab = gen_label_rtx ();
8596 rtx_code_label *donelab = gen_label_rtx ();
8597 i0 = gen_reg_rtx (DImode);
8598 i1 = gen_reg_rtx (DImode);
8599 limit = gen_reg_rtx (mode);
8600 f0 = gen_reg_rtx (mode);
8602 emit_move_insn (limit,
8603 const_double_from_real_value (
8604 REAL_VALUE_ATOF ("9223372036854775808.0", mode), mode));
8605 emit_cmp_and_jump_insns (in, limit, GE, NULL_RTX, mode, 0, neglab);
8607 emit_insn (gen_rtx_SET (out,
8608 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, in))));
8609 emit_jump_insn (gen_jump (donelab));
8610 emit_barrier ();
8612 emit_label (neglab);
8614 emit_insn (gen_rtx_SET (f0, gen_rtx_MINUS (mode, in, limit)));
8615 emit_insn (gen_rtx_SET (i0,
8616 gen_rtx_FIX (DImode, gen_rtx_FIX (mode, f0))));
8617 emit_insn (gen_movdi (i1, const1_rtx));
8618 emit_insn (gen_ashldi3 (i1, i1, GEN_INT (63)));
8619 emit_insn (gen_xordi3 (out, i0, i1));
8621 emit_label (donelab);
8624 /* Return the string to output a compare and branch instruction to DEST.
8625 DEST is the destination insn (i.e. the label), INSN is the source,
8626 and OP is the conditional expression. */
8628 const char *
8629 output_cbcond (rtx op, rtx dest, rtx_insn *insn)
8631 machine_mode mode = GET_MODE (XEXP (op, 0));
8632 enum rtx_code code = GET_CODE (op);
8633 const char *cond_str, *tmpl;
8634 int far, emit_nop, len;
8635 static char string[64];
8636 char size_char;
8638 /* Compare and Branch is limited to +-2KB. If it is too far away,
8639 change
8641 cxbne X, Y, .LC30
8645 cxbe X, Y, .+16
8647 ba,pt xcc, .LC30
8648 nop */
8650 len = get_attr_length (insn);
8652 far = len == 4;
8653 emit_nop = len == 2;
8655 if (far)
8656 code = reverse_condition (code);
8658 size_char = ((mode == SImode) ? 'w' : 'x');
8660 switch (code)
8662 case NE:
8663 cond_str = "ne";
8664 break;
8666 case EQ:
8667 cond_str = "e";
8668 break;
8670 case GE:
8671 cond_str = "ge";
8672 break;
8674 case GT:
8675 cond_str = "g";
8676 break;
8678 case LE:
8679 cond_str = "le";
8680 break;
8682 case LT:
8683 cond_str = "l";
8684 break;
8686 case GEU:
8687 cond_str = "cc";
8688 break;
8690 case GTU:
8691 cond_str = "gu";
8692 break;
8694 case LEU:
8695 cond_str = "leu";
8696 break;
8698 case LTU:
8699 cond_str = "cs";
8700 break;
8702 default:
8703 gcc_unreachable ();
8706 if (far)
8708 int veryfar = 1, delta;
8710 if (INSN_ADDRESSES_SET_P ())
8712 delta = (INSN_ADDRESSES (INSN_UID (dest))
8713 - INSN_ADDRESSES (INSN_UID (insn)));
8714 /* Leave some instructions for "slop". */
8715 if (delta >= -260000 && delta < 260000)
8716 veryfar = 0;
8719 if (veryfar)
8720 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tb\t%%3\n\tnop";
8721 else
8722 tmpl = "c%cb%s\t%%1, %%2, .+16\n\tnop\n\tba,pt\t%%%%xcc, %%3\n\tnop";
8724 else
8726 if (emit_nop)
8727 tmpl = "c%cb%s\t%%1, %%2, %%3\n\tnop";
8728 else
8729 tmpl = "c%cb%s\t%%1, %%2, %%3";
8732 snprintf (string, sizeof(string), tmpl, size_char, cond_str);
8734 return string;
8737 /* Return the string to output a conditional branch to LABEL, testing
8738 register REG. LABEL is the operand number of the label; REG is the
8739 operand number of the reg. OP is the conditional expression. The mode
8740 of REG says what kind of comparison we made.
8742 DEST is the destination insn (i.e. the label), INSN is the source.
8744 REVERSED is nonzero if we should reverse the sense of the comparison.
8746 ANNUL is nonzero if we should generate an annulling branch. */
8748 const char *
8749 output_v9branch (rtx op, rtx dest, int reg, int label, int reversed,
8750 int annul, rtx_insn *insn)
8752 static char string[64];
8753 enum rtx_code code = GET_CODE (op);
8754 machine_mode mode = GET_MODE (XEXP (op, 0));
8755 rtx note;
8756 int far;
8757 char *p;
8759 /* branch on register are limited to +-128KB. If it is too far away,
8760 change
8762 brnz,pt %g1, .LC30
8766 brz,pn %g1, .+12
8768 ba,pt %xcc, .LC30
8772 brgez,a,pn %o1, .LC29
8776 brlz,pt %o1, .+16
8778 ba,pt %xcc, .LC29 */
8780 far = get_attr_length (insn) >= 3;
8782 /* If not floating-point or if EQ or NE, we can just reverse the code. */
8783 if (reversed ^ far)
8784 code = reverse_condition (code);
8786 /* Only 64-bit versions of these instructions exist. */
8787 gcc_assert (mode == DImode);
8789 /* Start by writing the branch condition. */
8791 switch (code)
8793 case NE:
8794 strcpy (string, "brnz");
8795 break;
8797 case EQ:
8798 strcpy (string, "brz");
8799 break;
8801 case GE:
8802 strcpy (string, "brgez");
8803 break;
8805 case LT:
8806 strcpy (string, "brlz");
8807 break;
8809 case LE:
8810 strcpy (string, "brlez");
8811 break;
8813 case GT:
8814 strcpy (string, "brgz");
8815 break;
8817 default:
8818 gcc_unreachable ();
8821 p = strchr (string, '\0');
8823 /* Now add the annulling, reg, label, and nop. */
8824 if (annul && ! far)
8826 strcpy (p, ",a");
8827 p += 2;
8830 if (insn && (note = find_reg_note (insn, REG_BR_PROB, NULL_RTX)))
8832 strcpy (p,
8833 ((profile_probability::from_reg_br_prob_note (XINT (note, 0))
8834 >= profile_probability::even ()) ^ far)
8835 ? ",pt" : ",pn");
8836 p += 3;
8839 *p = p < string + 8 ? '\t' : ' ';
8840 p++;
8841 *p++ = '%';
8842 *p++ = '0' + reg;
8843 *p++ = ',';
8844 *p++ = ' ';
8845 if (far)
8847 int veryfar = 1, delta;
8849 if (INSN_ADDRESSES_SET_P ())
8851 delta = (INSN_ADDRESSES (INSN_UID (dest))
8852 - INSN_ADDRESSES (INSN_UID (insn)));
8853 /* Leave some instructions for "slop". */
8854 if (delta >= -260000 && delta < 260000)
8855 veryfar = 0;
8858 strcpy (p, ".+12\n\t nop\n\t");
8859 /* Skip the next insn if requested or
8860 if we know that it will be a nop. */
8861 if (annul || ! final_sequence)
8862 p[3] = '6';
8863 p += 12;
8864 if (veryfar)
8866 strcpy (p, "b\t");
8867 p += 2;
8869 else
8871 strcpy (p, "ba,pt\t%%xcc, ");
8872 p += 13;
8875 *p++ = '%';
8876 *p++ = 'l';
8877 *p++ = '0' + label;
8878 *p++ = '%';
8879 *p++ = '#';
8880 *p = '\0';
8882 return string;
8885 /* Return 1, if any of the registers of the instruction are %l[0-7] or %o[0-7].
8886 Such instructions cannot be used in the delay slot of return insn on v9.
8887 If TEST is 0, also rename all %i[0-7] registers to their %o[0-7] counterparts.
8890 static int
8891 epilogue_renumber (rtx *where, int test)
8893 const char *fmt;
8894 int i;
8895 enum rtx_code code;
8897 if (*where == 0)
8898 return 0;
8900 code = GET_CODE (*where);
8902 switch (code)
8904 case REG:
8905 if (REGNO (*where) >= 8 && REGNO (*where) < 24) /* oX or lX */
8906 return 1;
8907 if (! test && REGNO (*where) >= 24 && REGNO (*where) < 32)
8909 if (ORIGINAL_REGNO (*where))
8911 rtx n = gen_raw_REG (GET_MODE (*where),
8912 OUTGOING_REGNO (REGNO (*where)));
8913 ORIGINAL_REGNO (n) = ORIGINAL_REGNO (*where);
8914 *where = n;
8916 else
8917 *where = gen_rtx_REG (GET_MODE (*where),
8918 OUTGOING_REGNO (REGNO (*where)));
8920 return 0;
8922 case SCRATCH:
8923 case PC:
8924 case CONST_INT:
8925 case CONST_WIDE_INT:
8926 case CONST_DOUBLE:
8927 return 0;
8929 /* Do not replace the frame pointer with the stack pointer because
8930 it can cause the delayed instruction to load below the stack.
8931 This occurs when instructions like:
8933 (set (reg/i:SI 24 %i0)
8934 (mem/f:SI (plus:SI (reg/f:SI 30 %fp)
8935 (const_int -20 [0xffffffec])) 0))
8937 are in the return delayed slot. */
8938 case PLUS:
8939 if (GET_CODE (XEXP (*where, 0)) == REG
8940 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM
8941 && (GET_CODE (XEXP (*where, 1)) != CONST_INT
8942 || INTVAL (XEXP (*where, 1)) < SPARC_STACK_BIAS))
8943 return 1;
8944 break;
8946 case MEM:
8947 if (SPARC_STACK_BIAS
8948 && GET_CODE (XEXP (*where, 0)) == REG
8949 && REGNO (XEXP (*where, 0)) == HARD_FRAME_POINTER_REGNUM)
8950 return 1;
8951 break;
8953 default:
8954 break;
8957 fmt = GET_RTX_FORMAT (code);
8959 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8961 if (fmt[i] == 'E')
8963 int j;
8964 for (j = XVECLEN (*where, i) - 1; j >= 0; j--)
8965 if (epilogue_renumber (&(XVECEXP (*where, i, j)), test))
8966 return 1;
8968 else if (fmt[i] == 'e'
8969 && epilogue_renumber (&(XEXP (*where, i)), test))
8970 return 1;
8972 return 0;
8975 /* Leaf functions and non-leaf functions have different needs. */
8977 static const int reg_leaf_alloc_order[] = REG_LEAF_ALLOC_ORDER;
8979 static const int reg_nonleaf_alloc_order[] = REG_ALLOC_ORDER;
8981 static const int *const reg_alloc_orders[] =
8983 reg_leaf_alloc_order,
8984 reg_nonleaf_alloc_order
8987 void
8988 sparc_order_regs_for_local_alloc (void)
8990 static int last_order_nonleaf = 1;
8992 if (df_regs_ever_live_p (15) != last_order_nonleaf)
8994 last_order_nonleaf = !last_order_nonleaf;
8995 memcpy ((char *) reg_alloc_order,
8996 (const char *) reg_alloc_orders[last_order_nonleaf],
8997 FIRST_PSEUDO_REGISTER * sizeof (int));
9002 sparc_leaf_reg_remap (int regno)
9004 gcc_checking_assert (regno >= 0);
9006 /* Do not remap in flat mode. */
9007 if (TARGET_FLAT)
9008 return regno;
9010 /* Do not remap global, stack pointer or floating-point registers. */
9011 if (regno < 8 || regno == STACK_POINTER_REGNUM || regno > SPARC_LAST_INT_REG)
9012 return regno;
9014 /* Neither out nor local nor frame pointer registers must appear. */
9015 if ((regno >= 8 && regno <= 23) || regno == HARD_FRAME_POINTER_REGNUM)
9016 return -1;
9018 /* Remap in to out registers. */
9019 return regno - 16;
9022 /* Return 1 if REG and MEM are legitimate enough to allow the various
9023 MEM<-->REG splits to be run. */
9026 sparc_split_reg_mem_legitimate (rtx reg, rtx mem)
9028 /* Punt if we are here by mistake. */
9029 gcc_assert (reload_completed);
9031 /* We must have an offsettable memory reference. */
9032 if (!offsettable_memref_p (mem))
9033 return 0;
9035 /* If we have legitimate args for ldd/std, we do not want
9036 the split to happen. */
9037 if ((REGNO (reg) % 2) == 0 && mem_min_alignment (mem, 8))
9038 return 0;
9040 /* Success. */
9041 return 1;
9044 /* Split a REG <-- MEM move into a pair of moves in MODE. */
9046 void
9047 sparc_split_reg_mem (rtx dest, rtx src, machine_mode mode)
9049 rtx high_part = gen_highpart (mode, dest);
9050 rtx low_part = gen_lowpart (mode, dest);
9051 rtx word0 = adjust_address (src, mode, 0);
9052 rtx word1 = adjust_address (src, mode, 4);
9054 if (reg_overlap_mentioned_p (high_part, word1))
9056 emit_move_insn_1 (low_part, word1);
9057 emit_move_insn_1 (high_part, word0);
9059 else
9061 emit_move_insn_1 (high_part, word0);
9062 emit_move_insn_1 (low_part, word1);
9066 /* Split a MEM <-- REG move into a pair of moves in MODE. */
9068 void
9069 sparc_split_mem_reg (rtx dest, rtx src, machine_mode mode)
9071 rtx word0 = adjust_address (dest, mode, 0);
9072 rtx word1 = adjust_address (dest, mode, 4);
9073 rtx high_part = gen_highpart (mode, src);
9074 rtx low_part = gen_lowpart (mode, src);
9076 emit_move_insn_1 (word0, high_part);
9077 emit_move_insn_1 (word1, low_part);
9080 /* Like sparc_split_reg_mem_legitimate but for REG <--> REG moves. */
9083 sparc_split_reg_reg_legitimate (rtx reg1, rtx reg2)
9085 /* Punt if we are here by mistake. */
9086 gcc_assert (reload_completed);
9088 if (GET_CODE (reg1) == SUBREG)
9089 reg1 = SUBREG_REG (reg1);
9090 if (GET_CODE (reg1) != REG)
9091 return 0;
9092 const int regno1 = REGNO (reg1);
9094 if (GET_CODE (reg2) == SUBREG)
9095 reg2 = SUBREG_REG (reg2);
9096 if (GET_CODE (reg2) != REG)
9097 return 0;
9098 const int regno2 = REGNO (reg2);
9100 if (SPARC_INT_REG_P (regno1) && SPARC_INT_REG_P (regno2))
9101 return 1;
9103 if (TARGET_VIS3)
9105 if ((SPARC_INT_REG_P (regno1) && SPARC_FP_REG_P (regno2))
9106 || (SPARC_FP_REG_P (regno1) && SPARC_INT_REG_P (regno2)))
9107 return 1;
9110 return 0;
9113 /* Split a REG <--> REG move into a pair of moves in MODE. */
9115 void
9116 sparc_split_reg_reg (rtx dest, rtx src, machine_mode mode)
9118 rtx dest1 = gen_highpart (mode, dest);
9119 rtx dest2 = gen_lowpart (mode, dest);
9120 rtx src1 = gen_highpart (mode, src);
9121 rtx src2 = gen_lowpart (mode, src);
9123 /* Now emit using the real source and destination we found, swapping
9124 the order if we detect overlap. */
9125 if (reg_overlap_mentioned_p (dest1, src2))
9127 emit_move_insn_1 (dest2, src2);
9128 emit_move_insn_1 (dest1, src1);
9130 else
9132 emit_move_insn_1 (dest1, src1);
9133 emit_move_insn_1 (dest2, src2);
9137 /* Return 1 if REGNO (reg1) is even and REGNO (reg1) == REGNO (reg2) - 1.
9138 This makes them candidates for using ldd and std insns.
9140 Note reg1 and reg2 *must* be hard registers. */
9143 registers_ok_for_ldd_peep (rtx reg1, rtx reg2)
9145 /* We might have been passed a SUBREG. */
9146 if (GET_CODE (reg1) != REG || GET_CODE (reg2) != REG)
9147 return 0;
9149 if (REGNO (reg1) % 2 != 0)
9150 return 0;
9152 /* Integer ldd is deprecated in SPARC V9 */
9153 if (TARGET_V9 && SPARC_INT_REG_P (REGNO (reg1)))
9154 return 0;
9156 return (REGNO (reg1) == REGNO (reg2) - 1);
9159 /* Return 1 if the addresses in mem1 and mem2 are suitable for use in
9160 an ldd or std insn.
9162 This can only happen when addr1 and addr2, the addresses in mem1
9163 and mem2, are consecutive memory locations (addr1 + 4 == addr2).
9164 addr1 must also be aligned on a 64-bit boundary.
9166 Also iff dependent_reg_rtx is not null it should not be used to
9167 compute the address for mem1, i.e. we cannot optimize a sequence
9168 like:
9169 ld [%o0], %o0
9170 ld [%o0 + 4], %o1
9172 ldd [%o0], %o0
9173 nor:
9174 ld [%g3 + 4], %g3
9175 ld [%g3], %g2
9177 ldd [%g3], %g2
9179 But, note that the transformation from:
9180 ld [%g2 + 4], %g3
9181 ld [%g2], %g2
9183 ldd [%g2], %g2
9184 is perfectly fine. Thus, the peephole2 patterns always pass us
9185 the destination register of the first load, never the second one.
9187 For stores we don't have a similar problem, so dependent_reg_rtx is
9188 NULL_RTX. */
9191 mems_ok_for_ldd_peep (rtx mem1, rtx mem2, rtx dependent_reg_rtx)
9193 rtx addr1, addr2;
9194 unsigned int reg1;
9195 HOST_WIDE_INT offset1;
9197 /* The mems cannot be volatile. */
9198 if (MEM_VOLATILE_P (mem1) || MEM_VOLATILE_P (mem2))
9199 return 0;
9201 /* MEM1 should be aligned on a 64-bit boundary. */
9202 if (MEM_ALIGN (mem1) < 64)
9203 return 0;
9205 addr1 = XEXP (mem1, 0);
9206 addr2 = XEXP (mem2, 0);
9208 /* Extract a register number and offset (if used) from the first addr. */
9209 if (GET_CODE (addr1) == PLUS)
9211 /* If not a REG, return zero. */
9212 if (GET_CODE (XEXP (addr1, 0)) != REG)
9213 return 0;
9214 else
9216 reg1 = REGNO (XEXP (addr1, 0));
9217 /* The offset must be constant! */
9218 if (GET_CODE (XEXP (addr1, 1)) != CONST_INT)
9219 return 0;
9220 offset1 = INTVAL (XEXP (addr1, 1));
9223 else if (GET_CODE (addr1) != REG)
9224 return 0;
9225 else
9227 reg1 = REGNO (addr1);
9228 /* This was a simple (mem (reg)) expression. Offset is 0. */
9229 offset1 = 0;
9232 /* Make sure the second address is a (mem (plus (reg) (const_int). */
9233 if (GET_CODE (addr2) != PLUS)
9234 return 0;
9236 if (GET_CODE (XEXP (addr2, 0)) != REG
9237 || GET_CODE (XEXP (addr2, 1)) != CONST_INT)
9238 return 0;
9240 if (reg1 != REGNO (XEXP (addr2, 0)))
9241 return 0;
9243 if (dependent_reg_rtx != NULL_RTX && reg1 == REGNO (dependent_reg_rtx))
9244 return 0;
9246 /* The first offset must be evenly divisible by 8 to ensure the
9247 address is 64-bit aligned. */
9248 if (offset1 % 8 != 0)
9249 return 0;
9251 /* The offset for the second addr must be 4 more than the first addr. */
9252 if (INTVAL (XEXP (addr2, 1)) != offset1 + 4)
9253 return 0;
9255 /* All the tests passed. addr1 and addr2 are valid for ldd and std
9256 instructions. */
9257 return 1;
9260 /* Return the widened memory access made of MEM1 and MEM2 in MODE. */
9263 widen_mem_for_ldd_peep (rtx mem1, rtx mem2, machine_mode mode)
9265 rtx x = widen_memory_access (mem1, mode, 0);
9266 MEM_NOTRAP_P (x) = MEM_NOTRAP_P (mem1) && MEM_NOTRAP_P (mem2);
9267 return x;
9270 /* Return 1 if reg is a pseudo, or is the first register in
9271 a hard register pair. This makes it suitable for use in
9272 ldd and std insns. */
9275 register_ok_for_ldd (rtx reg)
9277 /* We might have been passed a SUBREG. */
9278 if (!REG_P (reg))
9279 return 0;
9281 if (REGNO (reg) < FIRST_PSEUDO_REGISTER)
9282 return (REGNO (reg) % 2 == 0);
9284 return 1;
9287 /* Return 1 if OP, a MEM, has an address which is known to be
9288 aligned to an 8-byte boundary. */
9291 memory_ok_for_ldd (rtx op)
9293 if (!mem_min_alignment (op, 8))
9294 return 0;
9296 /* We need to perform the job of a memory constraint. */
9297 if ((reload_in_progress || reload_completed)
9298 && !strict_memory_address_p (Pmode, XEXP (op, 0)))
9299 return 0;
9301 if (lra_in_progress && !memory_address_p (Pmode, XEXP (op, 0)))
9302 return 0;
9304 return 1;
9307 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
9309 static bool
9310 sparc_print_operand_punct_valid_p (unsigned char code)
9312 if (code == '#'
9313 || code == '*'
9314 || code == '('
9315 || code == ')'
9316 || code == '_'
9317 || code == '&')
9318 return true;
9320 return false;
9323 /* Implement TARGET_PRINT_OPERAND.
9324 Print operand X (an rtx) in assembler syntax to file FILE.
9325 CODE is a letter or dot (`z' in `%z0') or 0 if no letter was specified.
9326 For `%' followed by punctuation, CODE is the punctuation and X is null. */
9328 static void
9329 sparc_print_operand (FILE *file, rtx x, int code)
9331 const char *s;
9333 switch (code)
9335 case '#':
9336 /* Output an insn in a delay slot. */
9337 if (final_sequence)
9338 sparc_indent_opcode = 1;
9339 else
9340 fputs ("\n\t nop", file);
9341 return;
9342 case '*':
9343 /* Output an annul flag if there's nothing for the delay slot and we
9344 are optimizing. This is always used with '(' below.
9345 Sun OS 4.1.1 dbx can't handle an annulled unconditional branch;
9346 this is a dbx bug. So, we only do this when optimizing.
9347 On UltraSPARC, a branch in a delay slot causes a pipeline flush.
9348 Always emit a nop in case the next instruction is a branch. */
9349 if (! final_sequence && (optimize && (int)sparc_cpu < PROCESSOR_V9))
9350 fputs (",a", file);
9351 return;
9352 case '(':
9353 /* Output a 'nop' if there's nothing for the delay slot and we are
9354 not optimizing. This is always used with '*' above. */
9355 if (! final_sequence && ! (optimize && (int)sparc_cpu < PROCESSOR_V9))
9356 fputs ("\n\t nop", file);
9357 else if (final_sequence)
9358 sparc_indent_opcode = 1;
9359 return;
9360 case ')':
9361 /* Output the right displacement from the saved PC on function return.
9362 The caller may have placed an "unimp" insn immediately after the call
9363 so we have to account for it. This insn is used in the 32-bit ABI
9364 when calling a function that returns a non zero-sized structure. The
9365 64-bit ABI doesn't have it. Be careful to have this test be the same
9366 as that for the call. The exception is when sparc_std_struct_return
9367 is enabled, the psABI is followed exactly and the adjustment is made
9368 by the code in sparc_struct_value_rtx. The call emitted is the same
9369 when sparc_std_struct_return is enabled. */
9370 if (!TARGET_ARCH64
9371 && cfun->returns_struct
9372 && !sparc_std_struct_return
9373 && DECL_SIZE (DECL_RESULT (current_function_decl))
9374 && TREE_CODE (DECL_SIZE (DECL_RESULT (current_function_decl)))
9375 == INTEGER_CST
9376 && !integer_zerop (DECL_SIZE (DECL_RESULT (current_function_decl))))
9377 fputs ("12", file);
9378 else
9379 fputc ('8', file);
9380 return;
9381 case '_':
9382 /* Output the Embedded Medium/Anywhere code model base register. */
9383 fputs (EMBMEDANY_BASE_REG, file);
9384 return;
9385 case '&':
9386 /* Print some local dynamic TLS name. */
9387 if (const char *name = get_some_local_dynamic_name ())
9388 assemble_name (file, name);
9389 else
9390 output_operand_lossage ("'%%&' used without any "
9391 "local dynamic TLS references");
9392 return;
9394 case 'Y':
9395 /* Adjust the operand to take into account a RESTORE operation. */
9396 if (GET_CODE (x) == CONST_INT)
9397 break;
9398 else if (GET_CODE (x) != REG)
9399 output_operand_lossage ("invalid %%Y operand");
9400 else if (REGNO (x) < 8)
9401 fputs (reg_names[REGNO (x)], file);
9402 else if (REGNO (x) >= 24 && REGNO (x) < 32)
9403 fputs (reg_names[REGNO (x)-16], file);
9404 else
9405 output_operand_lossage ("invalid %%Y operand");
9406 return;
9407 case 'L':
9408 /* Print out the low order register name of a register pair. */
9409 if (WORDS_BIG_ENDIAN)
9410 fputs (reg_names[REGNO (x)+1], file);
9411 else
9412 fputs (reg_names[REGNO (x)], file);
9413 return;
9414 case 'H':
9415 /* Print out the high order register name of a register pair. */
9416 if (WORDS_BIG_ENDIAN)
9417 fputs (reg_names[REGNO (x)], file);
9418 else
9419 fputs (reg_names[REGNO (x)+1], file);
9420 return;
9421 case 'R':
9422 /* Print out the second register name of a register pair or quad.
9423 I.e., R (%o0) => %o1. */
9424 fputs (reg_names[REGNO (x)+1], file);
9425 return;
9426 case 'S':
9427 /* Print out the third register name of a register quad.
9428 I.e., S (%o0) => %o2. */
9429 fputs (reg_names[REGNO (x)+2], file);
9430 return;
9431 case 'T':
9432 /* Print out the fourth register name of a register quad.
9433 I.e., T (%o0) => %o3. */
9434 fputs (reg_names[REGNO (x)+3], file);
9435 return;
9436 case 'x':
9437 /* Print a condition code register. */
9438 if (REGNO (x) == SPARC_ICC_REG)
9440 switch (GET_MODE (x))
9442 case E_CCmode:
9443 case E_CCNZmode:
9444 case E_CCCmode:
9445 case E_CCVmode:
9446 s = "%icc";
9447 break;
9448 case E_CCXmode:
9449 case E_CCXNZmode:
9450 case E_CCXCmode:
9451 case E_CCXVmode:
9452 s = "%xcc";
9453 break;
9454 default:
9455 gcc_unreachable ();
9457 fputs (s, file);
9459 else
9460 /* %fccN register */
9461 fputs (reg_names[REGNO (x)], file);
9462 return;
9463 case 'm':
9464 /* Print the operand's address only. */
9465 output_address (GET_MODE (x), XEXP (x, 0));
9466 return;
9467 case 'r':
9468 /* In this case we need a register. Use %g0 if the
9469 operand is const0_rtx. */
9470 if (x == const0_rtx
9471 || (GET_MODE (x) != VOIDmode && x == CONST0_RTX (GET_MODE (x))))
9473 fputs ("%g0", file);
9474 return;
9476 else
9477 break;
9479 case 'A':
9480 switch (GET_CODE (x))
9482 case IOR:
9483 s = "or";
9484 break;
9485 case AND:
9486 s = "and";
9487 break;
9488 case XOR:
9489 s = "xor";
9490 break;
9491 default:
9492 output_operand_lossage ("invalid %%A operand");
9493 s = "";
9494 break;
9496 fputs (s, file);
9497 return;
9499 case 'B':
9500 switch (GET_CODE (x))
9502 case IOR:
9503 s = "orn";
9504 break;
9505 case AND:
9506 s = "andn";
9507 break;
9508 case XOR:
9509 s = "xnor";
9510 break;
9511 default:
9512 output_operand_lossage ("invalid %%B operand");
9513 s = "";
9514 break;
9516 fputs (s, file);
9517 return;
9519 /* This is used by the conditional move instructions. */
9520 case 'C':
9522 machine_mode mode = GET_MODE (XEXP (x, 0));
9523 switch (GET_CODE (x))
9525 case NE:
9526 if (mode == CCVmode || mode == CCXVmode)
9527 s = "vs";
9528 else
9529 s = "ne";
9530 break;
9531 case EQ:
9532 if (mode == CCVmode || mode == CCXVmode)
9533 s = "vc";
9534 else
9535 s = "e";
9536 break;
9537 case GE:
9538 if (mode == CCNZmode || mode == CCXNZmode)
9539 s = "pos";
9540 else
9541 s = "ge";
9542 break;
9543 case GT:
9544 s = "g";
9545 break;
9546 case LE:
9547 s = "le";
9548 break;
9549 case LT:
9550 if (mode == CCNZmode || mode == CCXNZmode)
9551 s = "neg";
9552 else
9553 s = "l";
9554 break;
9555 case GEU:
9556 s = "geu";
9557 break;
9558 case GTU:
9559 s = "gu";
9560 break;
9561 case LEU:
9562 s = "leu";
9563 break;
9564 case LTU:
9565 s = "lu";
9566 break;
9567 case LTGT:
9568 s = "lg";
9569 break;
9570 case UNORDERED:
9571 s = "u";
9572 break;
9573 case ORDERED:
9574 s = "o";
9575 break;
9576 case UNLT:
9577 s = "ul";
9578 break;
9579 case UNLE:
9580 s = "ule";
9581 break;
9582 case UNGT:
9583 s = "ug";
9584 break;
9585 case UNGE:
9586 s = "uge"
9587 ; break;
9588 case UNEQ:
9589 s = "ue";
9590 break;
9591 default:
9592 output_operand_lossage ("invalid %%C operand");
9593 s = "";
9594 break;
9596 fputs (s, file);
9597 return;
9600 /* This are used by the movr instruction pattern. */
9601 case 'D':
9603 switch (GET_CODE (x))
9605 case NE:
9606 s = "ne";
9607 break;
9608 case EQ:
9609 s = "e";
9610 break;
9611 case GE:
9612 s = "gez";
9613 break;
9614 case LT:
9615 s = "lz";
9616 break;
9617 case LE:
9618 s = "lez";
9619 break;
9620 case GT:
9621 s = "gz";
9622 break;
9623 default:
9624 output_operand_lossage ("invalid %%D operand");
9625 s = "";
9626 break;
9628 fputs (s, file);
9629 return;
9632 case 'b':
9634 /* Print a sign-extended character. */
9635 int i = trunc_int_for_mode (INTVAL (x), QImode);
9636 fprintf (file, "%d", i);
9637 return;
9640 case 'f':
9641 /* Operand must be a MEM; write its address. */
9642 if (GET_CODE (x) != MEM)
9643 output_operand_lossage ("invalid %%f operand");
9644 output_address (GET_MODE (x), XEXP (x, 0));
9645 return;
9647 case 's':
9649 /* Print a sign-extended 32-bit value. */
9650 HOST_WIDE_INT i;
9651 if (GET_CODE(x) == CONST_INT)
9652 i = INTVAL (x);
9653 else
9655 output_operand_lossage ("invalid %%s operand");
9656 return;
9658 i = trunc_int_for_mode (i, SImode);
9659 fprintf (file, HOST_WIDE_INT_PRINT_DEC, i);
9660 return;
9663 case 0:
9664 /* Do nothing special. */
9665 break;
9667 default:
9668 /* Undocumented flag. */
9669 output_operand_lossage ("invalid operand output code");
9672 if (GET_CODE (x) == REG)
9673 fputs (reg_names[REGNO (x)], file);
9674 else if (GET_CODE (x) == MEM)
9676 fputc ('[', file);
9677 /* Poor Sun assembler doesn't understand absolute addressing. */
9678 if (CONSTANT_P (XEXP (x, 0)))
9679 fputs ("%g0+", file);
9680 output_address (GET_MODE (x), XEXP (x, 0));
9681 fputc (']', file);
9683 else if (GET_CODE (x) == HIGH)
9685 fputs ("%hi(", file);
9686 output_addr_const (file, XEXP (x, 0));
9687 fputc (')', file);
9689 else if (GET_CODE (x) == LO_SUM)
9691 sparc_print_operand (file, XEXP (x, 0), 0);
9692 if (TARGET_CM_MEDMID)
9693 fputs ("+%l44(", file);
9694 else
9695 fputs ("+%lo(", file);
9696 output_addr_const (file, XEXP (x, 1));
9697 fputc (')', file);
9699 else if (GET_CODE (x) == CONST_DOUBLE)
9700 output_operand_lossage ("floating-point constant not a valid immediate operand");
9701 else
9702 output_addr_const (file, x);
9705 /* Implement TARGET_PRINT_OPERAND_ADDRESS. */
9707 static void
9708 sparc_print_operand_address (FILE *file, machine_mode /*mode*/, rtx x)
9710 rtx base, index = 0;
9711 int offset = 0;
9712 rtx addr = x;
9714 if (REG_P (addr))
9715 fputs (reg_names[REGNO (addr)], file);
9716 else if (GET_CODE (addr) == PLUS)
9718 if (CONST_INT_P (XEXP (addr, 0)))
9719 offset = INTVAL (XEXP (addr, 0)), base = XEXP (addr, 1);
9720 else if (CONST_INT_P (XEXP (addr, 1)))
9721 offset = INTVAL (XEXP (addr, 1)), base = XEXP (addr, 0);
9722 else
9723 base = XEXP (addr, 0), index = XEXP (addr, 1);
9724 if (GET_CODE (base) == LO_SUM)
9726 gcc_assert (USE_AS_OFFSETABLE_LO10
9727 && TARGET_ARCH64
9728 && ! TARGET_CM_MEDMID);
9729 output_operand (XEXP (base, 0), 0);
9730 fputs ("+%lo(", file);
9731 output_address (VOIDmode, XEXP (base, 1));
9732 fprintf (file, ")+%d", offset);
9734 else
9736 fputs (reg_names[REGNO (base)], file);
9737 if (index == 0)
9738 fprintf (file, "%+d", offset);
9739 else if (REG_P (index))
9740 fprintf (file, "+%s", reg_names[REGNO (index)]);
9741 else if (GET_CODE (index) == SYMBOL_REF
9742 || GET_CODE (index) == LABEL_REF
9743 || GET_CODE (index) == CONST)
9744 fputc ('+', file), output_addr_const (file, index);
9745 else gcc_unreachable ();
9748 else if (GET_CODE (addr) == MINUS
9749 && GET_CODE (XEXP (addr, 1)) == LABEL_REF)
9751 output_addr_const (file, XEXP (addr, 0));
9752 fputs ("-(", file);
9753 output_addr_const (file, XEXP (addr, 1));
9754 fputs ("-.)", file);
9756 else if (GET_CODE (addr) == LO_SUM)
9758 output_operand (XEXP (addr, 0), 0);
9759 if (TARGET_CM_MEDMID)
9760 fputs ("+%l44(", file);
9761 else
9762 fputs ("+%lo(", file);
9763 output_address (VOIDmode, XEXP (addr, 1));
9764 fputc (')', file);
9766 else if (flag_pic
9767 && GET_CODE (addr) == CONST
9768 && GET_CODE (XEXP (addr, 0)) == MINUS
9769 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST
9770 && GET_CODE (XEXP (XEXP (XEXP (addr, 0), 1), 0)) == MINUS
9771 && XEXP (XEXP (XEXP (XEXP (addr, 0), 1), 0), 1) == pc_rtx)
9773 addr = XEXP (addr, 0);
9774 output_addr_const (file, XEXP (addr, 0));
9775 /* Group the args of the second CONST in parenthesis. */
9776 fputs ("-(", file);
9777 /* Skip past the second CONST--it does nothing for us. */
9778 output_addr_const (file, XEXP (XEXP (addr, 1), 0));
9779 /* Close the parenthesis. */
9780 fputc (')', file);
9782 else
9784 output_addr_const (file, addr);
9788 /* Target hook for assembling integer objects. The sparc version has
9789 special handling for aligned DI-mode objects. */
9791 static bool
9792 sparc_assemble_integer (rtx x, unsigned int size, int aligned_p)
9794 /* ??? We only output .xword's for symbols and only then in environments
9795 where the assembler can handle them. */
9796 if (aligned_p && size == 8 && GET_CODE (x) != CONST_INT)
9798 if (TARGET_V9)
9800 assemble_integer_with_op ("\t.xword\t", x);
9801 return true;
9803 else
9805 assemble_aligned_integer (4, const0_rtx);
9806 assemble_aligned_integer (4, x);
9807 return true;
9810 return default_assemble_integer (x, size, aligned_p);
9813 /* Return the value of a code used in the .proc pseudo-op that says
9814 what kind of result this function returns. For non-C types, we pick
9815 the closest C type. */
9817 #ifndef SHORT_TYPE_SIZE
9818 #define SHORT_TYPE_SIZE (BITS_PER_UNIT * 2)
9819 #endif
9821 #ifndef INT_TYPE_SIZE
9822 #define INT_TYPE_SIZE BITS_PER_WORD
9823 #endif
9825 #ifndef LONG_TYPE_SIZE
9826 #define LONG_TYPE_SIZE BITS_PER_WORD
9827 #endif
9829 #ifndef LONG_LONG_TYPE_SIZE
9830 #define LONG_LONG_TYPE_SIZE (BITS_PER_WORD * 2)
9831 #endif
9833 unsigned long
9834 sparc_type_code (tree type)
9836 unsigned long qualifiers = 0;
9837 unsigned shift;
9839 /* Only the first 30 bits of the qualifier are valid. We must refrain from
9840 setting more, since some assemblers will give an error for this. Also,
9841 we must be careful to avoid shifts of 32 bits or more to avoid getting
9842 unpredictable results. */
9844 for (shift = 6; shift < 30; shift += 2, type = TREE_TYPE (type))
9846 switch (TREE_CODE (type))
9848 case ERROR_MARK:
9849 return qualifiers;
9851 case ARRAY_TYPE:
9852 qualifiers |= (3 << shift);
9853 break;
9855 case FUNCTION_TYPE:
9856 case METHOD_TYPE:
9857 qualifiers |= (2 << shift);
9858 break;
9860 case POINTER_TYPE:
9861 case REFERENCE_TYPE:
9862 case OFFSET_TYPE:
9863 qualifiers |= (1 << shift);
9864 break;
9866 case RECORD_TYPE:
9867 return (qualifiers | 8);
9869 case UNION_TYPE:
9870 case QUAL_UNION_TYPE:
9871 return (qualifiers | 9);
9873 case ENUMERAL_TYPE:
9874 return (qualifiers | 10);
9876 case VOID_TYPE:
9877 return (qualifiers | 16);
9879 case INTEGER_TYPE:
9880 /* If this is a range type, consider it to be the underlying
9881 type. */
9882 if (TREE_TYPE (type) != 0)
9883 break;
9885 /* Carefully distinguish all the standard types of C,
9886 without messing up if the language is not C. We do this by
9887 testing TYPE_PRECISION and TYPE_UNSIGNED. The old code used to
9888 look at both the names and the above fields, but that's redundant.
9889 Any type whose size is between two C types will be considered
9890 to be the wider of the two types. Also, we do not have a
9891 special code to use for "long long", so anything wider than
9892 long is treated the same. Note that we can't distinguish
9893 between "int" and "long" in this code if they are the same
9894 size, but that's fine, since neither can the assembler. */
9896 if (TYPE_PRECISION (type) <= CHAR_TYPE_SIZE)
9897 return (qualifiers | (TYPE_UNSIGNED (type) ? 12 : 2));
9899 else if (TYPE_PRECISION (type) <= SHORT_TYPE_SIZE)
9900 return (qualifiers | (TYPE_UNSIGNED (type) ? 13 : 3));
9902 else if (TYPE_PRECISION (type) <= INT_TYPE_SIZE)
9903 return (qualifiers | (TYPE_UNSIGNED (type) ? 14 : 4));
9905 else
9906 return (qualifiers | (TYPE_UNSIGNED (type) ? 15 : 5));
9908 case REAL_TYPE:
9909 /* If this is a range type, consider it to be the underlying
9910 type. */
9911 if (TREE_TYPE (type) != 0)
9912 break;
9914 /* Carefully distinguish all the standard types of C,
9915 without messing up if the language is not C. */
9917 if (TYPE_PRECISION (type) == TYPE_PRECISION (float_type_node))
9918 return (qualifiers | 6);
9920 else
9921 return (qualifiers | 7);
9923 case COMPLEX_TYPE: /* GNU Fortran COMPLEX type. */
9924 /* ??? We need to distinguish between double and float complex types,
9925 but I don't know how yet because I can't reach this code from
9926 existing front-ends. */
9927 return (qualifiers | 7); /* Who knows? */
9929 case VECTOR_TYPE:
9930 case BOOLEAN_TYPE: /* Boolean truth value type. */
9931 case LANG_TYPE:
9932 case NULLPTR_TYPE:
9933 return qualifiers;
9935 default:
9936 gcc_unreachable (); /* Not a type! */
9940 return qualifiers;
9943 /* Nested function support. */
9945 /* Emit RTL insns to initialize the variable parts of a trampoline.
9946 FNADDR is an RTX for the address of the function's pure code.
9947 CXT is an RTX for the static chain value for the function.
9949 This takes 16 insns: 2 shifts & 2 ands (to split up addresses), 4 sethi
9950 (to load in opcodes), 4 iors (to merge address and opcodes), and 4 writes
9951 (to store insns). This is a bit excessive. Perhaps a different
9952 mechanism would be better here.
9954 Emit enough FLUSH insns to synchronize the data and instruction caches. */
9956 static void
9957 sparc32_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
9959 /* SPARC 32-bit trampoline:
9961 sethi %hi(fn), %g1
9962 sethi %hi(static), %g2
9963 jmp %g1+%lo(fn)
9964 or %g2, %lo(static), %g2
9966 SETHI i,r = 00rr rrr1 00ii iiii iiii iiii iiii iiii
9967 JMPL r+i,d = 10dd ddd1 1100 0rrr rr1i iiii iiii iiii
9970 emit_move_insn
9971 (adjust_address (m_tramp, SImode, 0),
9972 expand_binop (SImode, ior_optab,
9973 expand_shift (RSHIFT_EXPR, SImode, fnaddr, 10, 0, 1),
9974 GEN_INT (trunc_int_for_mode (0x03000000, SImode)),
9975 NULL_RTX, 1, OPTAB_DIRECT));
9977 emit_move_insn
9978 (adjust_address (m_tramp, SImode, 4),
9979 expand_binop (SImode, ior_optab,
9980 expand_shift (RSHIFT_EXPR, SImode, cxt, 10, 0, 1),
9981 GEN_INT (trunc_int_for_mode (0x05000000, SImode)),
9982 NULL_RTX, 1, OPTAB_DIRECT));
9984 emit_move_insn
9985 (adjust_address (m_tramp, SImode, 8),
9986 expand_binop (SImode, ior_optab,
9987 expand_and (SImode, fnaddr, GEN_INT (0x3ff), NULL_RTX),
9988 GEN_INT (trunc_int_for_mode (0x81c06000, SImode)),
9989 NULL_RTX, 1, OPTAB_DIRECT));
9991 emit_move_insn
9992 (adjust_address (m_tramp, SImode, 12),
9993 expand_binop (SImode, ior_optab,
9994 expand_and (SImode, cxt, GEN_INT (0x3ff), NULL_RTX),
9995 GEN_INT (trunc_int_for_mode (0x8410a000, SImode)),
9996 NULL_RTX, 1, OPTAB_DIRECT));
9998 emit_insn
9999 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 0))));
10001 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
10002 aligned on a 16 byte boundary so one flush clears it all. */
10003 if (sparc_cpu != PROCESSOR_ULTRASPARC
10004 && sparc_cpu != PROCESSOR_ULTRASPARC3
10005 && sparc_cpu != PROCESSOR_NIAGARA
10006 && sparc_cpu != PROCESSOR_NIAGARA2
10007 && sparc_cpu != PROCESSOR_NIAGARA3
10008 && sparc_cpu != PROCESSOR_NIAGARA4
10009 && sparc_cpu != PROCESSOR_NIAGARA7
10010 && sparc_cpu != PROCESSOR_M8)
10011 emit_insn
10012 (gen_flush (SImode, validize_mem (adjust_address (m_tramp, SImode, 8))));
10014 /* Call __enable_execute_stack after writing onto the stack to make sure
10015 the stack address is accessible. */
10016 #ifdef HAVE_ENABLE_EXECUTE_STACK
10017 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10018 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10019 #endif
10023 /* The 64-bit version is simpler because it makes more sense to load the
10024 values as "immediate" data out of the trampoline. It's also easier since
10025 we can read the PC without clobbering a register. */
10027 static void
10028 sparc64_initialize_trampoline (rtx m_tramp, rtx fnaddr, rtx cxt)
10030 /* SPARC 64-bit trampoline:
10032 rd %pc, %g1
10033 ldx [%g1+24], %g5
10034 jmp %g5
10035 ldx [%g1+16], %g5
10036 +16 bytes data
10039 emit_move_insn (adjust_address (m_tramp, SImode, 0),
10040 GEN_INT (trunc_int_for_mode (0x83414000, SImode)));
10041 emit_move_insn (adjust_address (m_tramp, SImode, 4),
10042 GEN_INT (trunc_int_for_mode (0xca586018, SImode)));
10043 emit_move_insn (adjust_address (m_tramp, SImode, 8),
10044 GEN_INT (trunc_int_for_mode (0x81c14000, SImode)));
10045 emit_move_insn (adjust_address (m_tramp, SImode, 12),
10046 GEN_INT (trunc_int_for_mode (0xca586010, SImode)));
10047 emit_move_insn (adjust_address (m_tramp, DImode, 16), cxt);
10048 emit_move_insn (adjust_address (m_tramp, DImode, 24), fnaddr);
10049 emit_insn
10050 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 0))));
10052 /* On UltraSPARC a flush flushes an entire cache line. The trampoline is
10053 aligned on a 16 byte boundary so one flush clears it all. */
10054 if (sparc_cpu != PROCESSOR_ULTRASPARC
10055 && sparc_cpu != PROCESSOR_ULTRASPARC3
10056 && sparc_cpu != PROCESSOR_NIAGARA
10057 && sparc_cpu != PROCESSOR_NIAGARA2
10058 && sparc_cpu != PROCESSOR_NIAGARA3
10059 && sparc_cpu != PROCESSOR_NIAGARA4
10060 && sparc_cpu != PROCESSOR_NIAGARA7
10061 && sparc_cpu != PROCESSOR_M8)
10062 emit_insn
10063 (gen_flush (DImode, validize_mem (adjust_address (m_tramp, DImode, 8))));
10065 /* Call __enable_execute_stack after writing onto the stack to make sure
10066 the stack address is accessible. */
10067 #ifdef HAVE_ENABLE_EXECUTE_STACK
10068 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
10069 LCT_NORMAL, VOIDmode, XEXP (m_tramp, 0), Pmode);
10070 #endif
10073 /* Worker for TARGET_TRAMPOLINE_INIT. */
10075 static void
10076 sparc_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
10078 rtx fnaddr = force_reg (Pmode, XEXP (DECL_RTL (fndecl), 0));
10079 cxt = force_reg (Pmode, cxt);
10080 if (TARGET_ARCH64)
10081 sparc64_initialize_trampoline (m_tramp, fnaddr, cxt);
10082 else
10083 sparc32_initialize_trampoline (m_tramp, fnaddr, cxt);
10086 /* Adjust the cost of a scheduling dependency. Return the new cost of
10087 a dependency LINK or INSN on DEP_INSN. COST is the current cost. */
10089 static int
10090 supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
10091 int cost)
10093 enum attr_type insn_type;
10095 if (recog_memoized (insn) < 0)
10096 return cost;
10098 insn_type = get_attr_type (insn);
10100 if (dep_type == 0)
10102 /* Data dependency; DEP_INSN writes a register that INSN reads some
10103 cycles later. */
10105 /* if a load, then the dependence must be on the memory address;
10106 add an extra "cycle". Note that the cost could be two cycles
10107 if the reg was written late in an instruction group; we ca not tell
10108 here. */
10109 if (insn_type == TYPE_LOAD || insn_type == TYPE_FPLOAD)
10110 return cost + 3;
10112 /* Get the delay only if the address of the store is the dependence. */
10113 if (insn_type == TYPE_STORE || insn_type == TYPE_FPSTORE)
10115 rtx pat = PATTERN(insn);
10116 rtx dep_pat = PATTERN (dep_insn);
10118 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10119 return cost; /* This should not happen! */
10121 /* The dependency between the two instructions was on the data that
10122 is being stored. Assume that this implies that the address of the
10123 store is not dependent. */
10124 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10125 return cost;
10127 return cost + 3; /* An approximation. */
10130 /* A shift instruction cannot receive its data from an instruction
10131 in the same cycle; add a one cycle penalty. */
10132 if (insn_type == TYPE_SHIFT)
10133 return cost + 3; /* Split before cascade into shift. */
10135 else
10137 /* Anti- or output- dependency; DEP_INSN reads/writes a register that
10138 INSN writes some cycles later. */
10140 /* These are only significant for the fpu unit; writing a fp reg before
10141 the fpu has finished with it stalls the processor. */
10143 /* Reusing an integer register causes no problems. */
10144 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10145 return 0;
10148 return cost;
10151 static int
10152 hypersparc_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10153 int cost)
10155 enum attr_type insn_type, dep_type;
10156 rtx pat = PATTERN(insn);
10157 rtx dep_pat = PATTERN (dep_insn);
10159 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10160 return cost;
10162 insn_type = get_attr_type (insn);
10163 dep_type = get_attr_type (dep_insn);
10165 switch (dtype)
10167 case 0:
10168 /* Data dependency; DEP_INSN writes a register that INSN reads some
10169 cycles later. */
10171 switch (insn_type)
10173 case TYPE_STORE:
10174 case TYPE_FPSTORE:
10175 /* Get the delay iff the address of the store is the dependence. */
10176 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10177 return cost;
10179 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10180 return cost;
10181 return cost + 3;
10183 case TYPE_LOAD:
10184 case TYPE_SLOAD:
10185 case TYPE_FPLOAD:
10186 /* If a load, then the dependence must be on the memory address. If
10187 the addresses aren't equal, then it might be a false dependency */
10188 if (dep_type == TYPE_STORE || dep_type == TYPE_FPSTORE)
10190 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET
10191 || GET_CODE (SET_DEST (dep_pat)) != MEM
10192 || GET_CODE (SET_SRC (pat)) != MEM
10193 || ! rtx_equal_p (XEXP (SET_DEST (dep_pat), 0),
10194 XEXP (SET_SRC (pat), 0)))
10195 return cost + 2;
10197 return cost + 8;
10199 break;
10201 case TYPE_BRANCH:
10202 /* Compare to branch latency is 0. There is no benefit from
10203 separating compare and branch. */
10204 if (dep_type == TYPE_COMPARE)
10205 return 0;
10206 /* Floating point compare to branch latency is less than
10207 compare to conditional move. */
10208 if (dep_type == TYPE_FPCMP)
10209 return cost - 1;
10210 break;
10211 default:
10212 break;
10214 break;
10216 case REG_DEP_ANTI:
10217 /* Anti-dependencies only penalize the fpu unit. */
10218 if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
10219 return 0;
10220 break;
10222 default:
10223 break;
10226 return cost;
10229 static int
10230 leon5_adjust_cost (rtx_insn *insn, int dtype, rtx_insn *dep_insn,
10231 int cost)
10233 enum attr_type insn_type, dep_type;
10234 rtx pat = PATTERN (insn);
10235 rtx dep_pat = PATTERN (dep_insn);
10237 if (recog_memoized (insn) < 0 || recog_memoized (dep_insn) < 0)
10238 return cost;
10240 insn_type = get_attr_type (insn);
10241 dep_type = get_attr_type (dep_insn);
10243 switch (dtype)
10245 case REG_DEP_TRUE:
10246 /* Data dependency; DEP_INSN writes a register that INSN reads some
10247 cycles later. */
10249 switch (insn_type)
10251 case TYPE_STORE:
10252 /* Try to schedule three instructions between the store and
10253 the ALU instruction that generated the data. */
10254 if (dep_type == TYPE_IALU || dep_type == TYPE_SHIFT)
10256 if (GET_CODE (pat) != SET || GET_CODE (dep_pat) != SET)
10257 break;
10259 if (rtx_equal_p (SET_DEST (dep_pat), SET_SRC (pat)))
10260 return 4;
10262 break;
10263 default:
10264 break;
10266 break;
10267 case REG_DEP_ANTI:
10268 /* Penalize anti-dependencies for FPU instructions. */
10269 if (fpop_insn_p (insn) || insn_type == TYPE_FPLOAD)
10270 return 4;
10271 break;
10272 default:
10273 break;
10276 return cost;
10279 static int
10280 sparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
10281 unsigned int)
10283 switch (sparc_cpu)
10285 case PROCESSOR_LEON5:
10286 cost = leon5_adjust_cost (insn, dep_type, dep, cost);
10287 break;
10288 case PROCESSOR_SUPERSPARC:
10289 cost = supersparc_adjust_cost (insn, dep_type, dep, cost);
10290 break;
10291 case PROCESSOR_HYPERSPARC:
10292 case PROCESSOR_SPARCLITE86X:
10293 cost = hypersparc_adjust_cost (insn, dep_type, dep, cost);
10294 break;
10295 default:
10296 break;
10298 return cost;
10301 static void
10302 sparc_sched_init (FILE *dump ATTRIBUTE_UNUSED,
10303 int sched_verbose ATTRIBUTE_UNUSED,
10304 int max_ready ATTRIBUTE_UNUSED)
10307 static int
10308 sparc_use_sched_lookahead (void)
10310 switch (sparc_cpu)
10312 case PROCESSOR_ULTRASPARC:
10313 case PROCESSOR_ULTRASPARC3:
10314 return 4;
10315 case PROCESSOR_SUPERSPARC:
10316 case PROCESSOR_HYPERSPARC:
10317 case PROCESSOR_SPARCLITE86X:
10318 return 3;
10319 case PROCESSOR_NIAGARA4:
10320 case PROCESSOR_NIAGARA7:
10321 case PROCESSOR_M8:
10322 return 2;
10323 case PROCESSOR_NIAGARA:
10324 case PROCESSOR_NIAGARA2:
10325 case PROCESSOR_NIAGARA3:
10326 default:
10327 return 0;
10331 static int
10332 sparc_issue_rate (void)
10334 switch (sparc_cpu)
10336 case PROCESSOR_ULTRASPARC:
10337 case PROCESSOR_ULTRASPARC3:
10338 case PROCESSOR_M8:
10339 return 4;
10340 case PROCESSOR_SUPERSPARC:
10341 return 3;
10342 case PROCESSOR_HYPERSPARC:
10343 case PROCESSOR_SPARCLITE86X:
10344 case PROCESSOR_V9:
10345 /* Assume V9 processors are capable of at least dual-issue. */
10346 case PROCESSOR_NIAGARA4:
10347 case PROCESSOR_NIAGARA7:
10348 return 2;
10349 case PROCESSOR_NIAGARA:
10350 case PROCESSOR_NIAGARA2:
10351 case PROCESSOR_NIAGARA3:
10352 default:
10353 return 1;
10358 sparc_branch_cost (bool speed_p, bool predictable_p)
10360 if (!speed_p)
10361 return 2;
10363 /* For pre-V9 processors we use a single value (usually 3) to take into
10364 account the potential annulling of the delay slot (which ends up being
10365 a bubble in the pipeline slot) plus a cycle to take into consideration
10366 the instruction cache effects.
10368 On V9 and later processors, which have branch prediction facilities,
10369 we take into account whether the branch is (easily) predictable. */
10370 const int cost = sparc_costs->branch_cost;
10372 switch (sparc_cpu)
10374 case PROCESSOR_V9:
10375 case PROCESSOR_ULTRASPARC:
10376 case PROCESSOR_ULTRASPARC3:
10377 case PROCESSOR_NIAGARA:
10378 case PROCESSOR_NIAGARA2:
10379 case PROCESSOR_NIAGARA3:
10380 case PROCESSOR_NIAGARA4:
10381 case PROCESSOR_NIAGARA7:
10382 case PROCESSOR_M8:
10383 return cost + (predictable_p ? 0 : 2);
10385 default:
10386 return cost;
10390 static int
10391 set_extends (rtx_insn *insn)
10393 rtx pat = PATTERN (insn);
10395 switch (GET_CODE (SET_SRC (pat)))
10397 /* Load and some shift instructions zero extend. */
10398 case MEM:
10399 case ZERO_EXTEND:
10400 /* sethi clears the high bits */
10401 case HIGH:
10402 /* LO_SUM is used with sethi. sethi cleared the high
10403 bits and the values used with lo_sum are positive */
10404 case LO_SUM:
10405 /* Store flag stores 0 or 1 */
10406 case LT: case LTU:
10407 case GT: case GTU:
10408 case LE: case LEU:
10409 case GE: case GEU:
10410 case EQ:
10411 case NE:
10412 return 1;
10413 case AND:
10415 rtx op0 = XEXP (SET_SRC (pat), 0);
10416 rtx op1 = XEXP (SET_SRC (pat), 1);
10417 if (GET_CODE (op1) == CONST_INT)
10418 return INTVAL (op1) >= 0;
10419 if (GET_CODE (op0) != REG)
10420 return 0;
10421 if (sparc_check_64 (op0, insn) == 1)
10422 return 1;
10423 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10425 case IOR:
10426 case XOR:
10428 rtx op0 = XEXP (SET_SRC (pat), 0);
10429 rtx op1 = XEXP (SET_SRC (pat), 1);
10430 if (GET_CODE (op0) != REG || sparc_check_64 (op0, insn) <= 0)
10431 return 0;
10432 if (GET_CODE (op1) == CONST_INT)
10433 return INTVAL (op1) >= 0;
10434 return (GET_CODE (op1) == REG && sparc_check_64 (op1, insn) == 1);
10436 case LSHIFTRT:
10437 return GET_MODE (SET_SRC (pat)) == SImode;
10438 /* Positive integers leave the high bits zero. */
10439 case CONST_INT:
10440 return !(INTVAL (SET_SRC (pat)) & 0x80000000);
10441 case ASHIFTRT:
10442 case SIGN_EXTEND:
10443 return - (GET_MODE (SET_SRC (pat)) == SImode);
10444 case REG:
10445 return sparc_check_64 (SET_SRC (pat), insn);
10446 default:
10447 return 0;
10451 /* We _ought_ to have only one kind per function, but... */
10452 static GTY(()) rtx sparc_addr_diff_list;
10453 static GTY(()) rtx sparc_addr_list;
10455 void
10456 sparc_defer_case_vector (rtx lab, rtx vec, int diff)
10458 vec = gen_rtx_EXPR_LIST (VOIDmode, lab, vec);
10459 if (diff)
10460 sparc_addr_diff_list
10461 = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_diff_list);
10462 else
10463 sparc_addr_list = gen_rtx_EXPR_LIST (VOIDmode, vec, sparc_addr_list);
10466 static void
10467 sparc_output_addr_vec (rtx vec)
10469 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10470 int idx, vlen = XVECLEN (body, 0);
10472 #ifdef ASM_OUTPUT_ADDR_VEC_START
10473 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10474 #endif
10476 #ifdef ASM_OUTPUT_CASE_LABEL
10477 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10478 NEXT_INSN (lab));
10479 #else
10480 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10481 #endif
10483 for (idx = 0; idx < vlen; idx++)
10485 ASM_OUTPUT_ADDR_VEC_ELT
10486 (asm_out_file, CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 0, idx), 0)));
10489 #ifdef ASM_OUTPUT_ADDR_VEC_END
10490 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10491 #endif
10494 static void
10495 sparc_output_addr_diff_vec (rtx vec)
10497 rtx lab = XEXP (vec, 0), body = XEXP (vec, 1);
10498 rtx base = XEXP (XEXP (body, 0), 0);
10499 int idx, vlen = XVECLEN (body, 1);
10501 #ifdef ASM_OUTPUT_ADDR_VEC_START
10502 ASM_OUTPUT_ADDR_VEC_START (asm_out_file);
10503 #endif
10505 #ifdef ASM_OUTPUT_CASE_LABEL
10506 ASM_OUTPUT_CASE_LABEL (asm_out_file, "L", CODE_LABEL_NUMBER (lab),
10507 NEXT_INSN (lab));
10508 #else
10509 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (lab));
10510 #endif
10512 for (idx = 0; idx < vlen; idx++)
10514 ASM_OUTPUT_ADDR_DIFF_ELT
10515 (asm_out_file,
10516 body,
10517 CODE_LABEL_NUMBER (XEXP (XVECEXP (body, 1, idx), 0)),
10518 CODE_LABEL_NUMBER (base));
10521 #ifdef ASM_OUTPUT_ADDR_VEC_END
10522 ASM_OUTPUT_ADDR_VEC_END (asm_out_file);
10523 #endif
10526 static void
10527 sparc_output_deferred_case_vectors (void)
10529 rtx t;
10530 int align;
10532 if (sparc_addr_list == NULL_RTX
10533 && sparc_addr_diff_list == NULL_RTX)
10534 return;
10536 /* Align to cache line in the function's code section. */
10537 switch_to_section (current_function_section ());
10539 align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
10540 if (align > 0)
10541 ASM_OUTPUT_ALIGN (asm_out_file, align);
10543 for (t = sparc_addr_list; t ; t = XEXP (t, 1))
10544 sparc_output_addr_vec (XEXP (t, 0));
10545 for (t = sparc_addr_diff_list; t ; t = XEXP (t, 1))
10546 sparc_output_addr_diff_vec (XEXP (t, 0));
10548 sparc_addr_list = sparc_addr_diff_list = NULL_RTX;
10551 /* Return 0 if the high 32 bits of X (the low word of X, if DImode) are
10552 unknown. Return 1 if the high bits are zero, -1 if the register is
10553 sign extended. */
10555 sparc_check_64 (rtx x, rtx_insn *insn)
10557 /* If a register is set only once it is safe to ignore insns this
10558 code does not know how to handle. The loop will either recognize
10559 the single set and return the correct value or fail to recognize
10560 it and return 0. */
10561 int set_once = 0;
10562 rtx y = x;
10564 gcc_assert (GET_CODE (x) == REG);
10566 if (GET_MODE (x) == DImode)
10567 y = gen_rtx_REG (SImode, REGNO (x) + WORDS_BIG_ENDIAN);
10569 if (flag_expensive_optimizations
10570 && df && DF_REG_DEF_COUNT (REGNO (y)) == 1)
10571 set_once = 1;
10573 if (insn == 0)
10575 if (set_once)
10576 insn = get_last_insn_anywhere ();
10577 else
10578 return 0;
10581 while ((insn = PREV_INSN (insn)))
10583 switch (GET_CODE (insn))
10585 case JUMP_INSN:
10586 case NOTE:
10587 break;
10588 case CODE_LABEL:
10589 case CALL_INSN:
10590 default:
10591 if (! set_once)
10592 return 0;
10593 break;
10594 case INSN:
10596 rtx pat = PATTERN (insn);
10597 if (GET_CODE (pat) != SET)
10598 return 0;
10599 if (rtx_equal_p (x, SET_DEST (pat)))
10600 return set_extends (insn);
10601 if (y && rtx_equal_p (y, SET_DEST (pat)))
10602 return set_extends (insn);
10603 if (reg_overlap_mentioned_p (SET_DEST (pat), y))
10604 return 0;
10608 return 0;
10611 /* Output a wide shift instruction in V8+ mode. INSN is the instruction,
10612 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
10614 const char *
10615 output_v8plus_shift (rtx_insn *insn, rtx *operands, const char *opcode)
10617 static char asm_code[60];
10619 /* The scratch register is only required when the destination
10620 register is not a 64-bit global or out register. */
10621 if (which_alternative != 2)
10622 operands[3] = operands[0];
10624 /* We can only shift by constants <= 63. */
10625 if (GET_CODE (operands[2]) == CONST_INT)
10626 operands[2] = GEN_INT (INTVAL (operands[2]) & 0x3f);
10628 if (GET_CODE (operands[1]) == CONST_INT)
10630 output_asm_insn ("mov\t%1, %3", operands);
10632 else
10634 output_asm_insn ("sllx\t%H1, 32, %3", operands);
10635 if (sparc_check_64 (operands[1], insn) <= 0)
10636 output_asm_insn ("srl\t%L1, 0, %L1", operands);
10637 output_asm_insn ("or\t%L1, %3, %3", operands);
10640 strcpy (asm_code, opcode);
10642 if (which_alternative != 2)
10643 return strcat (asm_code, "\t%0, %2, %L0\n\tsrlx\t%L0, 32, %H0");
10644 else
10645 return
10646 strcat (asm_code, "\t%3, %2, %3\n\tsrlx\t%3, 32, %H0\n\tmov\t%3, %L0");
10649 /* Output rtl to increment the profiler label LABELNO
10650 for profiling a function entry. */
10652 void
10653 sparc_profile_hook (int labelno)
10655 char buf[32];
10656 rtx lab, fun;
10658 fun = gen_rtx_SYMBOL_REF (Pmode, MCOUNT_FUNCTION);
10659 if (NO_PROFILE_COUNTERS)
10661 emit_library_call (fun, LCT_NORMAL, VOIDmode);
10663 else
10665 ASM_GENERATE_INTERNAL_LABEL (buf, "LP", labelno);
10666 lab = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (buf));
10667 emit_library_call (fun, LCT_NORMAL, VOIDmode, lab, Pmode);
10671 #ifdef TARGET_SOLARIS
10672 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
10674 static void
10675 sparc_solaris_elf_asm_named_section (const char *name, unsigned int flags,
10676 tree decl ATTRIBUTE_UNUSED)
10678 if (HAVE_COMDAT_GROUP && flags & SECTION_LINKONCE)
10680 solaris_elf_asm_comdat_section (name, flags, decl);
10681 return;
10684 fprintf (asm_out_file, "\t.section\t\"%s\"", name);
10686 if (!(flags & SECTION_DEBUG))
10687 fputs (",#alloc", asm_out_file);
10688 #if HAVE_GAS_SECTION_EXCLUDE
10689 if (flags & SECTION_EXCLUDE)
10690 fputs (",#exclude", asm_out_file);
10691 #endif
10692 if (flags & SECTION_WRITE)
10693 fputs (",#write", asm_out_file);
10694 if (flags & SECTION_TLS)
10695 fputs (",#tls", asm_out_file);
10696 if (flags & SECTION_CODE)
10697 fputs (",#execinstr", asm_out_file);
10699 if (flags & SECTION_NOTYPE)
10701 else if (flags & SECTION_BSS)
10702 fputs (",#nobits", asm_out_file);
10703 else
10704 fputs (",#progbits", asm_out_file);
10706 fputc ('\n', asm_out_file);
10708 #endif /* TARGET_SOLARIS */
10710 /* We do not allow indirect calls to be optimized into sibling calls.
10712 We cannot use sibling calls when delayed branches are disabled
10713 because they will likely require the call delay slot to be filled.
10715 Also, on SPARC 32-bit we cannot emit a sibling call when the
10716 current function returns a structure. This is because the "unimp
10717 after call" convention would cause the callee to return to the
10718 wrong place. The generic code already disallows cases where the
10719 function being called returns a structure.
10721 It may seem strange how this last case could occur. Usually there
10722 is code after the call which jumps to epilogue code which dumps the
10723 return value into the struct return area. That ought to invalidate
10724 the sibling call right? Well, in the C++ case we can end up passing
10725 the pointer to the struct return area to a constructor (which returns
10726 void) and then nothing else happens. Such a sibling call would look
10727 valid without the added check here.
10729 VxWorks PIC PLT entries require the global pointer to be initialized
10730 on entry. We therefore can't emit sibling calls to them. */
10731 static bool
10732 sparc_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10734 return (decl
10735 && flag_delayed_branch
10736 && (TARGET_ARCH64 || ! cfun->returns_struct)
10737 && !(TARGET_VXWORKS_RTP
10738 && flag_pic
10739 && !targetm.binds_local_p (decl)));
10742 /* libfunc renaming. */
10744 static void
10745 sparc_init_libfuncs (void)
10747 if (TARGET_ARCH32)
10749 /* Use the subroutines that Sun's library provides for integer
10750 multiply and divide. The `*' prevents an underscore from
10751 being prepended by the compiler. .umul is a little faster
10752 than .mul. */
10753 set_optab_libfunc (smul_optab, SImode, "*.umul");
10754 set_optab_libfunc (sdiv_optab, SImode, "*.div");
10755 set_optab_libfunc (udiv_optab, SImode, "*.udiv");
10756 set_optab_libfunc (smod_optab, SImode, "*.rem");
10757 set_optab_libfunc (umod_optab, SImode, "*.urem");
10759 /* TFmode arithmetic. These names are part of the SPARC 32bit ABI. */
10760 set_optab_libfunc (add_optab, TFmode, "_Q_add");
10761 set_optab_libfunc (sub_optab, TFmode, "_Q_sub");
10762 set_optab_libfunc (neg_optab, TFmode, "_Q_neg");
10763 set_optab_libfunc (smul_optab, TFmode, "_Q_mul");
10764 set_optab_libfunc (sdiv_optab, TFmode, "_Q_div");
10766 /* We can define the TFmode sqrt optab only if TARGET_FPU. This
10767 is because with soft-float, the SFmode and DFmode sqrt
10768 instructions will be absent, and the compiler will notice and
10769 try to use the TFmode sqrt instruction for calls to the
10770 builtin function sqrt, but this fails. */
10771 if (TARGET_FPU)
10772 set_optab_libfunc (sqrt_optab, TFmode, "_Q_sqrt");
10774 set_optab_libfunc (eq_optab, TFmode, "_Q_feq");
10775 set_optab_libfunc (ne_optab, TFmode, "_Q_fne");
10776 set_optab_libfunc (gt_optab, TFmode, "_Q_fgt");
10777 set_optab_libfunc (ge_optab, TFmode, "_Q_fge");
10778 set_optab_libfunc (lt_optab, TFmode, "_Q_flt");
10779 set_optab_libfunc (le_optab, TFmode, "_Q_fle");
10781 set_conv_libfunc (sext_optab, TFmode, SFmode, "_Q_stoq");
10782 set_conv_libfunc (sext_optab, TFmode, DFmode, "_Q_dtoq");
10783 set_conv_libfunc (trunc_optab, SFmode, TFmode, "_Q_qtos");
10784 set_conv_libfunc (trunc_optab, DFmode, TFmode, "_Q_qtod");
10786 set_conv_libfunc (sfix_optab, SImode, TFmode, "_Q_qtoi");
10787 set_conv_libfunc (ufix_optab, SImode, TFmode, "_Q_qtou");
10788 set_conv_libfunc (sfloat_optab, TFmode, SImode, "_Q_itoq");
10789 set_conv_libfunc (ufloat_optab, TFmode, SImode, "_Q_utoq");
10791 if (DITF_CONVERSION_LIBFUNCS)
10793 set_conv_libfunc (sfix_optab, DImode, TFmode, "_Q_qtoll");
10794 set_conv_libfunc (ufix_optab, DImode, TFmode, "_Q_qtoull");
10795 set_conv_libfunc (sfloat_optab, TFmode, DImode, "_Q_lltoq");
10796 set_conv_libfunc (ufloat_optab, TFmode, DImode, "_Q_ulltoq");
10799 if (SUN_CONVERSION_LIBFUNCS)
10801 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftoll");
10802 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoull");
10803 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtoll");
10804 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoull");
10807 if (TARGET_ARCH64)
10809 /* In the SPARC 64bit ABI, SImode multiply and divide functions
10810 do not exist in the library. Make sure the compiler does not
10811 emit calls to them by accident. (It should always use the
10812 hardware instructions.) */
10813 set_optab_libfunc (smul_optab, SImode, 0);
10814 set_optab_libfunc (sdiv_optab, SImode, 0);
10815 set_optab_libfunc (udiv_optab, SImode, 0);
10816 set_optab_libfunc (smod_optab, SImode, 0);
10817 set_optab_libfunc (umod_optab, SImode, 0);
10819 if (SUN_INTEGER_MULTIPLY_64)
10821 set_optab_libfunc (smul_optab, DImode, "__mul64");
10822 set_optab_libfunc (sdiv_optab, DImode, "__div64");
10823 set_optab_libfunc (udiv_optab, DImode, "__udiv64");
10824 set_optab_libfunc (smod_optab, DImode, "__rem64");
10825 set_optab_libfunc (umod_optab, DImode, "__urem64");
10828 if (SUN_CONVERSION_LIBFUNCS)
10830 set_conv_libfunc (sfix_optab, DImode, SFmode, "__ftol");
10831 set_conv_libfunc (ufix_optab, DImode, SFmode, "__ftoul");
10832 set_conv_libfunc (sfix_optab, DImode, DFmode, "__dtol");
10833 set_conv_libfunc (ufix_optab, DImode, DFmode, "__dtoul");
10838 /* SPARC builtins. */
10839 enum sparc_builtins
10841 /* FPU builtins. */
10842 SPARC_BUILTIN_LDFSR,
10843 SPARC_BUILTIN_STFSR,
10845 /* VIS 1.0 builtins. */
10846 SPARC_BUILTIN_FPACK16,
10847 SPARC_BUILTIN_FPACK32,
10848 SPARC_BUILTIN_FPACKFIX,
10849 SPARC_BUILTIN_FEXPAND,
10850 SPARC_BUILTIN_FPMERGE,
10851 SPARC_BUILTIN_FMUL8X16,
10852 SPARC_BUILTIN_FMUL8X16AU,
10853 SPARC_BUILTIN_FMUL8X16AL,
10854 SPARC_BUILTIN_FMUL8SUX16,
10855 SPARC_BUILTIN_FMUL8ULX16,
10856 SPARC_BUILTIN_FMULD8SUX16,
10857 SPARC_BUILTIN_FMULD8ULX16,
10858 SPARC_BUILTIN_FALIGNDATAV4HI,
10859 SPARC_BUILTIN_FALIGNDATAV8QI,
10860 SPARC_BUILTIN_FALIGNDATAV2SI,
10861 SPARC_BUILTIN_FALIGNDATADI,
10862 SPARC_BUILTIN_WRGSR,
10863 SPARC_BUILTIN_RDGSR,
10864 SPARC_BUILTIN_ALIGNADDR,
10865 SPARC_BUILTIN_ALIGNADDRL,
10866 SPARC_BUILTIN_PDIST,
10867 SPARC_BUILTIN_EDGE8,
10868 SPARC_BUILTIN_EDGE8L,
10869 SPARC_BUILTIN_EDGE16,
10870 SPARC_BUILTIN_EDGE16L,
10871 SPARC_BUILTIN_EDGE32,
10872 SPARC_BUILTIN_EDGE32L,
10873 SPARC_BUILTIN_FCMPLE16,
10874 SPARC_BUILTIN_FCMPLE32,
10875 SPARC_BUILTIN_FCMPNE16,
10876 SPARC_BUILTIN_FCMPNE32,
10877 SPARC_BUILTIN_FCMPGT16,
10878 SPARC_BUILTIN_FCMPGT32,
10879 SPARC_BUILTIN_FCMPEQ16,
10880 SPARC_BUILTIN_FCMPEQ32,
10881 SPARC_BUILTIN_FPADD16,
10882 SPARC_BUILTIN_FPADD16S,
10883 SPARC_BUILTIN_FPADD32,
10884 SPARC_BUILTIN_FPADD32S,
10885 SPARC_BUILTIN_FPSUB16,
10886 SPARC_BUILTIN_FPSUB16S,
10887 SPARC_BUILTIN_FPSUB32,
10888 SPARC_BUILTIN_FPSUB32S,
10889 SPARC_BUILTIN_ARRAY8,
10890 SPARC_BUILTIN_ARRAY16,
10891 SPARC_BUILTIN_ARRAY32,
10893 /* VIS 2.0 builtins. */
10894 SPARC_BUILTIN_EDGE8N,
10895 SPARC_BUILTIN_EDGE8LN,
10896 SPARC_BUILTIN_EDGE16N,
10897 SPARC_BUILTIN_EDGE16LN,
10898 SPARC_BUILTIN_EDGE32N,
10899 SPARC_BUILTIN_EDGE32LN,
10900 SPARC_BUILTIN_BMASK,
10901 SPARC_BUILTIN_BSHUFFLEV4HI,
10902 SPARC_BUILTIN_BSHUFFLEV8QI,
10903 SPARC_BUILTIN_BSHUFFLEV2SI,
10904 SPARC_BUILTIN_BSHUFFLEDI,
10906 /* VIS 3.0 builtins. */
10907 SPARC_BUILTIN_CMASK8,
10908 SPARC_BUILTIN_CMASK16,
10909 SPARC_BUILTIN_CMASK32,
10910 SPARC_BUILTIN_FCHKSM16,
10911 SPARC_BUILTIN_FSLL16,
10912 SPARC_BUILTIN_FSLAS16,
10913 SPARC_BUILTIN_FSRL16,
10914 SPARC_BUILTIN_FSRA16,
10915 SPARC_BUILTIN_FSLL32,
10916 SPARC_BUILTIN_FSLAS32,
10917 SPARC_BUILTIN_FSRL32,
10918 SPARC_BUILTIN_FSRA32,
10919 SPARC_BUILTIN_PDISTN,
10920 SPARC_BUILTIN_FMEAN16,
10921 SPARC_BUILTIN_FPADD64,
10922 SPARC_BUILTIN_FPSUB64,
10923 SPARC_BUILTIN_FPADDS16,
10924 SPARC_BUILTIN_FPADDS16S,
10925 SPARC_BUILTIN_FPSUBS16,
10926 SPARC_BUILTIN_FPSUBS16S,
10927 SPARC_BUILTIN_FPADDS32,
10928 SPARC_BUILTIN_FPADDS32S,
10929 SPARC_BUILTIN_FPSUBS32,
10930 SPARC_BUILTIN_FPSUBS32S,
10931 SPARC_BUILTIN_FUCMPLE8,
10932 SPARC_BUILTIN_FUCMPNE8,
10933 SPARC_BUILTIN_FUCMPGT8,
10934 SPARC_BUILTIN_FUCMPEQ8,
10935 SPARC_BUILTIN_FHADDS,
10936 SPARC_BUILTIN_FHADDD,
10937 SPARC_BUILTIN_FHSUBS,
10938 SPARC_BUILTIN_FHSUBD,
10939 SPARC_BUILTIN_FNHADDS,
10940 SPARC_BUILTIN_FNHADDD,
10941 SPARC_BUILTIN_UMULXHI,
10942 SPARC_BUILTIN_XMULX,
10943 SPARC_BUILTIN_XMULXHI,
10945 /* VIS 4.0 builtins. */
10946 SPARC_BUILTIN_FPADD8,
10947 SPARC_BUILTIN_FPADDS8,
10948 SPARC_BUILTIN_FPADDUS8,
10949 SPARC_BUILTIN_FPADDUS16,
10950 SPARC_BUILTIN_FPCMPLE8,
10951 SPARC_BUILTIN_FPCMPGT8,
10952 SPARC_BUILTIN_FPCMPULE16,
10953 SPARC_BUILTIN_FPCMPUGT16,
10954 SPARC_BUILTIN_FPCMPULE32,
10955 SPARC_BUILTIN_FPCMPUGT32,
10956 SPARC_BUILTIN_FPMAX8,
10957 SPARC_BUILTIN_FPMAX16,
10958 SPARC_BUILTIN_FPMAX32,
10959 SPARC_BUILTIN_FPMAXU8,
10960 SPARC_BUILTIN_FPMAXU16,
10961 SPARC_BUILTIN_FPMAXU32,
10962 SPARC_BUILTIN_FPMIN8,
10963 SPARC_BUILTIN_FPMIN16,
10964 SPARC_BUILTIN_FPMIN32,
10965 SPARC_BUILTIN_FPMINU8,
10966 SPARC_BUILTIN_FPMINU16,
10967 SPARC_BUILTIN_FPMINU32,
10968 SPARC_BUILTIN_FPSUB8,
10969 SPARC_BUILTIN_FPSUBS8,
10970 SPARC_BUILTIN_FPSUBUS8,
10971 SPARC_BUILTIN_FPSUBUS16,
10973 /* VIS 4.0B builtins. */
10975 /* Note that all the DICTUNPACK* entries should be kept
10976 contiguous. */
10977 SPARC_BUILTIN_FIRST_DICTUNPACK,
10978 SPARC_BUILTIN_DICTUNPACK8 = SPARC_BUILTIN_FIRST_DICTUNPACK,
10979 SPARC_BUILTIN_DICTUNPACK16,
10980 SPARC_BUILTIN_DICTUNPACK32,
10981 SPARC_BUILTIN_LAST_DICTUNPACK = SPARC_BUILTIN_DICTUNPACK32,
10983 /* Note that all the FPCMP*SHL entries should be kept
10984 contiguous. */
10985 SPARC_BUILTIN_FIRST_FPCMPSHL,
10986 SPARC_BUILTIN_FPCMPLE8SHL = SPARC_BUILTIN_FIRST_FPCMPSHL,
10987 SPARC_BUILTIN_FPCMPGT8SHL,
10988 SPARC_BUILTIN_FPCMPEQ8SHL,
10989 SPARC_BUILTIN_FPCMPNE8SHL,
10990 SPARC_BUILTIN_FPCMPLE16SHL,
10991 SPARC_BUILTIN_FPCMPGT16SHL,
10992 SPARC_BUILTIN_FPCMPEQ16SHL,
10993 SPARC_BUILTIN_FPCMPNE16SHL,
10994 SPARC_BUILTIN_FPCMPLE32SHL,
10995 SPARC_BUILTIN_FPCMPGT32SHL,
10996 SPARC_BUILTIN_FPCMPEQ32SHL,
10997 SPARC_BUILTIN_FPCMPNE32SHL,
10998 SPARC_BUILTIN_FPCMPULE8SHL,
10999 SPARC_BUILTIN_FPCMPUGT8SHL,
11000 SPARC_BUILTIN_FPCMPULE16SHL,
11001 SPARC_BUILTIN_FPCMPUGT16SHL,
11002 SPARC_BUILTIN_FPCMPULE32SHL,
11003 SPARC_BUILTIN_FPCMPUGT32SHL,
11004 SPARC_BUILTIN_FPCMPDE8SHL,
11005 SPARC_BUILTIN_FPCMPDE16SHL,
11006 SPARC_BUILTIN_FPCMPDE32SHL,
11007 SPARC_BUILTIN_FPCMPUR8SHL,
11008 SPARC_BUILTIN_FPCMPUR16SHL,
11009 SPARC_BUILTIN_FPCMPUR32SHL,
11010 SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
11012 SPARC_BUILTIN_MAX
11015 static GTY (()) tree sparc_builtins[(int) SPARC_BUILTIN_MAX];
11016 static enum insn_code sparc_builtins_icode[(int) SPARC_BUILTIN_MAX];
11018 /* Return true if OPVAL can be used for operand OPNUM of instruction ICODE.
11019 The instruction should require a constant operand of some sort. The
11020 function prints an error if OPVAL is not valid. */
11022 static int
11023 check_constant_argument (enum insn_code icode, int opnum, rtx opval)
11025 if (GET_CODE (opval) != CONST_INT)
11027 error ("%qs expects a constant argument", insn_data[icode].name);
11028 return false;
11031 if (!(*insn_data[icode].operand[opnum].predicate) (opval, VOIDmode))
11033 error ("constant argument out of range for %qs", insn_data[icode].name);
11034 return false;
11036 return true;
11039 /* Add a SPARC builtin function with NAME, ICODE, CODE and TYPE. Return the
11040 function decl or NULL_TREE if the builtin was not added. */
11042 static tree
11043 def_builtin (const char *name, enum insn_code icode, enum sparc_builtins code,
11044 tree type)
11046 tree t
11047 = add_builtin_function (name, type, code, BUILT_IN_MD, NULL, NULL_TREE);
11049 if (t)
11051 sparc_builtins[code] = t;
11052 sparc_builtins_icode[code] = icode;
11055 return t;
11058 /* Likewise, but also marks the function as "const". */
11060 static tree
11061 def_builtin_const (const char *name, enum insn_code icode,
11062 enum sparc_builtins code, tree type)
11064 tree t = def_builtin (name, icode, code, type);
11066 if (t)
11067 TREE_READONLY (t) = 1;
11069 return t;
11072 /* Implement the TARGET_INIT_BUILTINS target hook.
11073 Create builtin functions for special SPARC instructions. */
11075 static void
11076 sparc_init_builtins (void)
11078 if (TARGET_FPU)
11079 sparc_fpu_init_builtins ();
11081 if (TARGET_VIS)
11082 sparc_vis_init_builtins ();
11085 /* Create builtin functions for FPU instructions. */
11087 static void
11088 sparc_fpu_init_builtins (void)
11090 tree ftype
11091 = build_function_type_list (void_type_node,
11092 build_pointer_type (unsigned_type_node), 0);
11093 def_builtin ("__builtin_load_fsr", CODE_FOR_ldfsr,
11094 SPARC_BUILTIN_LDFSR, ftype);
11095 def_builtin ("__builtin_store_fsr", CODE_FOR_stfsr,
11096 SPARC_BUILTIN_STFSR, ftype);
11099 /* Create builtin functions for VIS instructions. */
11101 static void
11102 sparc_vis_init_builtins (void)
11104 tree v4qi = build_vector_type (unsigned_intQI_type_node, 4);
11105 tree v8qi = build_vector_type (unsigned_intQI_type_node, 8);
11106 tree v4hi = build_vector_type (intHI_type_node, 4);
11107 tree v2hi = build_vector_type (intHI_type_node, 2);
11108 tree v2si = build_vector_type (intSI_type_node, 2);
11109 tree v1si = build_vector_type (intSI_type_node, 1);
11111 tree v4qi_ftype_v4hi = build_function_type_list (v4qi, v4hi, 0);
11112 tree v8qi_ftype_v2si_v8qi = build_function_type_list (v8qi, v2si, v8qi, 0);
11113 tree v2hi_ftype_v2si = build_function_type_list (v2hi, v2si, 0);
11114 tree v4hi_ftype_v4qi = build_function_type_list (v4hi, v4qi, 0);
11115 tree v8qi_ftype_v4qi_v4qi = build_function_type_list (v8qi, v4qi, v4qi, 0);
11116 tree v4hi_ftype_v4qi_v4hi = build_function_type_list (v4hi, v4qi, v4hi, 0);
11117 tree v4hi_ftype_v4qi_v2hi = build_function_type_list (v4hi, v4qi, v2hi, 0);
11118 tree v2si_ftype_v4qi_v2hi = build_function_type_list (v2si, v4qi, v2hi, 0);
11119 tree v4hi_ftype_v8qi_v4hi = build_function_type_list (v4hi, v8qi, v4hi, 0);
11120 tree v4hi_ftype_v4hi_v4hi = build_function_type_list (v4hi, v4hi, v4hi, 0);
11121 tree v2si_ftype_v2si_v2si = build_function_type_list (v2si, v2si, v2si, 0);
11122 tree v8qi_ftype_v8qi_v8qi = build_function_type_list (v8qi, v8qi, v8qi, 0);
11123 tree v2hi_ftype_v2hi_v2hi = build_function_type_list (v2hi, v2hi, v2hi, 0);
11124 tree v1si_ftype_v1si_v1si = build_function_type_list (v1si, v1si, v1si, 0);
11125 tree di_ftype_v8qi_v8qi_di = build_function_type_list (intDI_type_node,
11126 v8qi, v8qi,
11127 intDI_type_node, 0);
11128 tree di_ftype_v8qi_v8qi = build_function_type_list (intDI_type_node,
11129 v8qi, v8qi, 0);
11130 tree si_ftype_v8qi_v8qi = build_function_type_list (intSI_type_node,
11131 v8qi, v8qi, 0);
11132 tree v8qi_ftype_df_si = build_function_type_list (v8qi, double_type_node,
11133 intSI_type_node, 0);
11134 tree v4hi_ftype_df_si = build_function_type_list (v4hi, double_type_node,
11135 intSI_type_node, 0);
11136 tree v2si_ftype_df_si = build_function_type_list (v2si, double_type_node,
11137 intDI_type_node, 0);
11138 tree di_ftype_di_di = build_function_type_list (intDI_type_node,
11139 intDI_type_node,
11140 intDI_type_node, 0);
11141 tree si_ftype_si_si = build_function_type_list (intSI_type_node,
11142 intSI_type_node,
11143 intSI_type_node, 0);
11144 tree ptr_ftype_ptr_si = build_function_type_list (ptr_type_node,
11145 ptr_type_node,
11146 intSI_type_node, 0);
11147 tree ptr_ftype_ptr_di = build_function_type_list (ptr_type_node,
11148 ptr_type_node,
11149 intDI_type_node, 0);
11150 tree si_ftype_ptr_ptr = build_function_type_list (intSI_type_node,
11151 ptr_type_node,
11152 ptr_type_node, 0);
11153 tree di_ftype_ptr_ptr = build_function_type_list (intDI_type_node,
11154 ptr_type_node,
11155 ptr_type_node, 0);
11156 tree si_ftype_v4hi_v4hi = build_function_type_list (intSI_type_node,
11157 v4hi, v4hi, 0);
11158 tree si_ftype_v2si_v2si = build_function_type_list (intSI_type_node,
11159 v2si, v2si, 0);
11160 tree di_ftype_v4hi_v4hi = build_function_type_list (intDI_type_node,
11161 v4hi, v4hi, 0);
11162 tree di_ftype_v2si_v2si = build_function_type_list (intDI_type_node,
11163 v2si, v2si, 0);
11164 tree void_ftype_di = build_function_type_list (void_type_node,
11165 intDI_type_node, 0);
11166 tree di_ftype_void = build_function_type_list (intDI_type_node,
11167 void_type_node, 0);
11168 tree void_ftype_si = build_function_type_list (void_type_node,
11169 intSI_type_node, 0);
11170 tree sf_ftype_sf_sf = build_function_type_list (float_type_node,
11171 float_type_node,
11172 float_type_node, 0);
11173 tree df_ftype_df_df = build_function_type_list (double_type_node,
11174 double_type_node,
11175 double_type_node, 0);
11177 /* Packing and expanding vectors. */
11178 def_builtin ("__builtin_vis_fpack16", CODE_FOR_fpack16_vis,
11179 SPARC_BUILTIN_FPACK16, v4qi_ftype_v4hi);
11180 def_builtin ("__builtin_vis_fpack32", CODE_FOR_fpack32_vis,
11181 SPARC_BUILTIN_FPACK32, v8qi_ftype_v2si_v8qi);
11182 def_builtin ("__builtin_vis_fpackfix", CODE_FOR_fpackfix_vis,
11183 SPARC_BUILTIN_FPACKFIX, v2hi_ftype_v2si);
11184 def_builtin_const ("__builtin_vis_fexpand", CODE_FOR_fexpand_vis,
11185 SPARC_BUILTIN_FEXPAND, v4hi_ftype_v4qi);
11186 def_builtin_const ("__builtin_vis_fpmerge", CODE_FOR_fpmerge_vis,
11187 SPARC_BUILTIN_FPMERGE, v8qi_ftype_v4qi_v4qi);
11189 /* Multiplications. */
11190 def_builtin_const ("__builtin_vis_fmul8x16", CODE_FOR_fmul8x16_vis,
11191 SPARC_BUILTIN_FMUL8X16, v4hi_ftype_v4qi_v4hi);
11192 def_builtin_const ("__builtin_vis_fmul8x16au", CODE_FOR_fmul8x16au_vis,
11193 SPARC_BUILTIN_FMUL8X16AU, v4hi_ftype_v4qi_v2hi);
11194 def_builtin_const ("__builtin_vis_fmul8x16al", CODE_FOR_fmul8x16al_vis,
11195 SPARC_BUILTIN_FMUL8X16AL, v4hi_ftype_v4qi_v2hi);
11196 def_builtin_const ("__builtin_vis_fmul8sux16", CODE_FOR_fmul8sux16_vis,
11197 SPARC_BUILTIN_FMUL8SUX16, v4hi_ftype_v8qi_v4hi);
11198 def_builtin_const ("__builtin_vis_fmul8ulx16", CODE_FOR_fmul8ulx16_vis,
11199 SPARC_BUILTIN_FMUL8ULX16, v4hi_ftype_v8qi_v4hi);
11200 def_builtin_const ("__builtin_vis_fmuld8sux16", CODE_FOR_fmuld8sux16_vis,
11201 SPARC_BUILTIN_FMULD8SUX16, v2si_ftype_v4qi_v2hi);
11202 def_builtin_const ("__builtin_vis_fmuld8ulx16", CODE_FOR_fmuld8ulx16_vis,
11203 SPARC_BUILTIN_FMULD8ULX16, v2si_ftype_v4qi_v2hi);
11205 /* Data aligning. */
11206 def_builtin ("__builtin_vis_faligndatav4hi", CODE_FOR_faligndatav4hi_vis,
11207 SPARC_BUILTIN_FALIGNDATAV4HI, v4hi_ftype_v4hi_v4hi);
11208 def_builtin ("__builtin_vis_faligndatav8qi", CODE_FOR_faligndatav8qi_vis,
11209 SPARC_BUILTIN_FALIGNDATAV8QI, v8qi_ftype_v8qi_v8qi);
11210 def_builtin ("__builtin_vis_faligndatav2si", CODE_FOR_faligndatav2si_vis,
11211 SPARC_BUILTIN_FALIGNDATAV2SI, v2si_ftype_v2si_v2si);
11212 def_builtin ("__builtin_vis_faligndatadi", CODE_FOR_faligndatav1di_vis,
11213 SPARC_BUILTIN_FALIGNDATADI, di_ftype_di_di);
11215 def_builtin ("__builtin_vis_write_gsr", CODE_FOR_wrgsr_vis,
11216 SPARC_BUILTIN_WRGSR, void_ftype_di);
11217 def_builtin ("__builtin_vis_read_gsr", CODE_FOR_rdgsr_vis,
11218 SPARC_BUILTIN_RDGSR, di_ftype_void);
11220 if (TARGET_ARCH64)
11222 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrdi_vis,
11223 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_di);
11224 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrldi_vis,
11225 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_di);
11227 else
11229 def_builtin ("__builtin_vis_alignaddr", CODE_FOR_alignaddrsi_vis,
11230 SPARC_BUILTIN_ALIGNADDR, ptr_ftype_ptr_si);
11231 def_builtin ("__builtin_vis_alignaddrl", CODE_FOR_alignaddrlsi_vis,
11232 SPARC_BUILTIN_ALIGNADDRL, ptr_ftype_ptr_si);
11235 /* Pixel distance. */
11236 def_builtin_const ("__builtin_vis_pdist", CODE_FOR_pdist_vis,
11237 SPARC_BUILTIN_PDIST, di_ftype_v8qi_v8qi_di);
11239 /* Edge handling. */
11240 if (TARGET_ARCH64)
11242 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8di_vis,
11243 SPARC_BUILTIN_EDGE8, di_ftype_ptr_ptr);
11244 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8ldi_vis,
11245 SPARC_BUILTIN_EDGE8L, di_ftype_ptr_ptr);
11246 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16di_vis,
11247 SPARC_BUILTIN_EDGE16, di_ftype_ptr_ptr);
11248 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16ldi_vis,
11249 SPARC_BUILTIN_EDGE16L, di_ftype_ptr_ptr);
11250 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32di_vis,
11251 SPARC_BUILTIN_EDGE32, di_ftype_ptr_ptr);
11252 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32ldi_vis,
11253 SPARC_BUILTIN_EDGE32L, di_ftype_ptr_ptr);
11255 else
11257 def_builtin_const ("__builtin_vis_edge8", CODE_FOR_edge8si_vis,
11258 SPARC_BUILTIN_EDGE8, si_ftype_ptr_ptr);
11259 def_builtin_const ("__builtin_vis_edge8l", CODE_FOR_edge8lsi_vis,
11260 SPARC_BUILTIN_EDGE8L, si_ftype_ptr_ptr);
11261 def_builtin_const ("__builtin_vis_edge16", CODE_FOR_edge16si_vis,
11262 SPARC_BUILTIN_EDGE16, si_ftype_ptr_ptr);
11263 def_builtin_const ("__builtin_vis_edge16l", CODE_FOR_edge16lsi_vis,
11264 SPARC_BUILTIN_EDGE16L, si_ftype_ptr_ptr);
11265 def_builtin_const ("__builtin_vis_edge32", CODE_FOR_edge32si_vis,
11266 SPARC_BUILTIN_EDGE32, si_ftype_ptr_ptr);
11267 def_builtin_const ("__builtin_vis_edge32l", CODE_FOR_edge32lsi_vis,
11268 SPARC_BUILTIN_EDGE32L, si_ftype_ptr_ptr);
11271 /* Pixel compare. */
11272 if (TARGET_ARCH64)
11274 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
11275 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
11276 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
11277 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
11278 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
11279 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
11280 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
11281 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
11282 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
11283 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
11284 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
11285 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
11286 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
11287 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
11288 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
11289 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
11291 else
11293 def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
11294 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
11295 def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
11296 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
11297 def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
11298 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
11299 def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
11300 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
11301 def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
11302 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
11303 def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
11304 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
11305 def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
11306 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
11307 def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
11308 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
11311 /* Addition and subtraction. */
11312 def_builtin_const ("__builtin_vis_fpadd16", CODE_FOR_addv4hi3,
11313 SPARC_BUILTIN_FPADD16, v4hi_ftype_v4hi_v4hi);
11314 def_builtin_const ("__builtin_vis_fpadd16s", CODE_FOR_addv2hi3,
11315 SPARC_BUILTIN_FPADD16S, v2hi_ftype_v2hi_v2hi);
11316 def_builtin_const ("__builtin_vis_fpadd32", CODE_FOR_addv2si3,
11317 SPARC_BUILTIN_FPADD32, v2si_ftype_v2si_v2si);
11318 def_builtin_const ("__builtin_vis_fpadd32s", CODE_FOR_addv1si3,
11319 SPARC_BUILTIN_FPADD32S, v1si_ftype_v1si_v1si);
11320 def_builtin_const ("__builtin_vis_fpsub16", CODE_FOR_subv4hi3,
11321 SPARC_BUILTIN_FPSUB16, v4hi_ftype_v4hi_v4hi);
11322 def_builtin_const ("__builtin_vis_fpsub16s", CODE_FOR_subv2hi3,
11323 SPARC_BUILTIN_FPSUB16S, v2hi_ftype_v2hi_v2hi);
11324 def_builtin_const ("__builtin_vis_fpsub32", CODE_FOR_subv2si3,
11325 SPARC_BUILTIN_FPSUB32, v2si_ftype_v2si_v2si);
11326 def_builtin_const ("__builtin_vis_fpsub32s", CODE_FOR_subv1si3,
11327 SPARC_BUILTIN_FPSUB32S, v1si_ftype_v1si_v1si);
11329 /* Three-dimensional array addressing. */
11330 if (TARGET_ARCH64)
11332 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8di_vis,
11333 SPARC_BUILTIN_ARRAY8, di_ftype_di_di);
11334 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16di_vis,
11335 SPARC_BUILTIN_ARRAY16, di_ftype_di_di);
11336 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32di_vis,
11337 SPARC_BUILTIN_ARRAY32, di_ftype_di_di);
11339 else
11341 def_builtin_const ("__builtin_vis_array8", CODE_FOR_array8si_vis,
11342 SPARC_BUILTIN_ARRAY8, si_ftype_si_si);
11343 def_builtin_const ("__builtin_vis_array16", CODE_FOR_array16si_vis,
11344 SPARC_BUILTIN_ARRAY16, si_ftype_si_si);
11345 def_builtin_const ("__builtin_vis_array32", CODE_FOR_array32si_vis,
11346 SPARC_BUILTIN_ARRAY32, si_ftype_si_si);
11349 if (TARGET_VIS2)
11351 /* Edge handling. */
11352 if (TARGET_ARCH64)
11354 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8ndi_vis,
11355 SPARC_BUILTIN_EDGE8N, di_ftype_ptr_ptr);
11356 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lndi_vis,
11357 SPARC_BUILTIN_EDGE8LN, di_ftype_ptr_ptr);
11358 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16ndi_vis,
11359 SPARC_BUILTIN_EDGE16N, di_ftype_ptr_ptr);
11360 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lndi_vis,
11361 SPARC_BUILTIN_EDGE16LN, di_ftype_ptr_ptr);
11362 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32ndi_vis,
11363 SPARC_BUILTIN_EDGE32N, di_ftype_ptr_ptr);
11364 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lndi_vis,
11365 SPARC_BUILTIN_EDGE32LN, di_ftype_ptr_ptr);
11367 else
11369 def_builtin_const ("__builtin_vis_edge8n", CODE_FOR_edge8nsi_vis,
11370 SPARC_BUILTIN_EDGE8N, si_ftype_ptr_ptr);
11371 def_builtin_const ("__builtin_vis_edge8ln", CODE_FOR_edge8lnsi_vis,
11372 SPARC_BUILTIN_EDGE8LN, si_ftype_ptr_ptr);
11373 def_builtin_const ("__builtin_vis_edge16n", CODE_FOR_edge16nsi_vis,
11374 SPARC_BUILTIN_EDGE16N, si_ftype_ptr_ptr);
11375 def_builtin_const ("__builtin_vis_edge16ln", CODE_FOR_edge16lnsi_vis,
11376 SPARC_BUILTIN_EDGE16LN, si_ftype_ptr_ptr);
11377 def_builtin_const ("__builtin_vis_edge32n", CODE_FOR_edge32nsi_vis,
11378 SPARC_BUILTIN_EDGE32N, si_ftype_ptr_ptr);
11379 def_builtin_const ("__builtin_vis_edge32ln", CODE_FOR_edge32lnsi_vis,
11380 SPARC_BUILTIN_EDGE32LN, si_ftype_ptr_ptr);
11383 /* Byte mask and shuffle. */
11384 if (TARGET_ARCH64)
11385 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmaskdi_vis,
11386 SPARC_BUILTIN_BMASK, di_ftype_di_di);
11387 else
11388 def_builtin ("__builtin_vis_bmask", CODE_FOR_bmasksi_vis,
11389 SPARC_BUILTIN_BMASK, si_ftype_si_si);
11390 def_builtin ("__builtin_vis_bshufflev4hi", CODE_FOR_bshufflev4hi_vis,
11391 SPARC_BUILTIN_BSHUFFLEV4HI, v4hi_ftype_v4hi_v4hi);
11392 def_builtin ("__builtin_vis_bshufflev8qi", CODE_FOR_bshufflev8qi_vis,
11393 SPARC_BUILTIN_BSHUFFLEV8QI, v8qi_ftype_v8qi_v8qi);
11394 def_builtin ("__builtin_vis_bshufflev2si", CODE_FOR_bshufflev2si_vis,
11395 SPARC_BUILTIN_BSHUFFLEV2SI, v2si_ftype_v2si_v2si);
11396 def_builtin ("__builtin_vis_bshuffledi", CODE_FOR_bshufflev1di_vis,
11397 SPARC_BUILTIN_BSHUFFLEDI, di_ftype_di_di);
11400 if (TARGET_VIS3)
11402 if (TARGET_ARCH64)
11404 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8di_vis,
11405 SPARC_BUILTIN_CMASK8, void_ftype_di);
11406 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16di_vis,
11407 SPARC_BUILTIN_CMASK16, void_ftype_di);
11408 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32di_vis,
11409 SPARC_BUILTIN_CMASK32, void_ftype_di);
11411 else
11413 def_builtin ("__builtin_vis_cmask8", CODE_FOR_cmask8si_vis,
11414 SPARC_BUILTIN_CMASK8, void_ftype_si);
11415 def_builtin ("__builtin_vis_cmask16", CODE_FOR_cmask16si_vis,
11416 SPARC_BUILTIN_CMASK16, void_ftype_si);
11417 def_builtin ("__builtin_vis_cmask32", CODE_FOR_cmask32si_vis,
11418 SPARC_BUILTIN_CMASK32, void_ftype_si);
11421 def_builtin_const ("__builtin_vis_fchksm16", CODE_FOR_fchksm16_vis,
11422 SPARC_BUILTIN_FCHKSM16, v4hi_ftype_v4hi_v4hi);
11424 def_builtin_const ("__builtin_vis_fsll16", CODE_FOR_vashlv4hi3,
11425 SPARC_BUILTIN_FSLL16, v4hi_ftype_v4hi_v4hi);
11426 def_builtin_const ("__builtin_vis_fslas16", CODE_FOR_vssashlv4hi3,
11427 SPARC_BUILTIN_FSLAS16, v4hi_ftype_v4hi_v4hi);
11428 def_builtin_const ("__builtin_vis_fsrl16", CODE_FOR_vlshrv4hi3,
11429 SPARC_BUILTIN_FSRL16, v4hi_ftype_v4hi_v4hi);
11430 def_builtin_const ("__builtin_vis_fsra16", CODE_FOR_vashrv4hi3,
11431 SPARC_BUILTIN_FSRA16, v4hi_ftype_v4hi_v4hi);
11432 def_builtin_const ("__builtin_vis_fsll32", CODE_FOR_vashlv2si3,
11433 SPARC_BUILTIN_FSLL32, v2si_ftype_v2si_v2si);
11434 def_builtin_const ("__builtin_vis_fslas32", CODE_FOR_vssashlv2si3,
11435 SPARC_BUILTIN_FSLAS32, v2si_ftype_v2si_v2si);
11436 def_builtin_const ("__builtin_vis_fsrl32", CODE_FOR_vlshrv2si3,
11437 SPARC_BUILTIN_FSRL32, v2si_ftype_v2si_v2si);
11438 def_builtin_const ("__builtin_vis_fsra32", CODE_FOR_vashrv2si3,
11439 SPARC_BUILTIN_FSRA32, v2si_ftype_v2si_v2si);
11441 if (TARGET_ARCH64)
11442 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistndi_vis,
11443 SPARC_BUILTIN_PDISTN, di_ftype_v8qi_v8qi);
11444 else
11445 def_builtin_const ("__builtin_vis_pdistn", CODE_FOR_pdistnsi_vis,
11446 SPARC_BUILTIN_PDISTN, si_ftype_v8qi_v8qi);
11448 def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
11449 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
11450 def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
11451 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
11452 def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
11453 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
11455 def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
11456 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
11457 def_builtin_const ("__builtin_vis_fpadds16s", CODE_FOR_ssaddv2hi3,
11458 SPARC_BUILTIN_FPADDS16S, v2hi_ftype_v2hi_v2hi);
11459 def_builtin_const ("__builtin_vis_fpsubs16", CODE_FOR_sssubv4hi3,
11460 SPARC_BUILTIN_FPSUBS16, v4hi_ftype_v4hi_v4hi);
11461 def_builtin_const ("__builtin_vis_fpsubs16s", CODE_FOR_sssubv2hi3,
11462 SPARC_BUILTIN_FPSUBS16S, v2hi_ftype_v2hi_v2hi);
11463 def_builtin_const ("__builtin_vis_fpadds32", CODE_FOR_ssaddv2si3,
11464 SPARC_BUILTIN_FPADDS32, v2si_ftype_v2si_v2si);
11465 def_builtin_const ("__builtin_vis_fpadds32s", CODE_FOR_ssaddv1si3,
11466 SPARC_BUILTIN_FPADDS32S, v1si_ftype_v1si_v1si);
11467 def_builtin_const ("__builtin_vis_fpsubs32", CODE_FOR_sssubv2si3,
11468 SPARC_BUILTIN_FPSUBS32, v2si_ftype_v2si_v2si);
11469 def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
11470 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
11472 if (TARGET_ARCH64)
11474 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
11475 SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
11476 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
11477 SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
11478 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
11479 SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
11480 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
11481 SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
11483 else
11485 def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
11486 SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
11487 def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
11488 SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
11489 def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
11490 SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
11491 def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
11492 SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
11495 def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
11496 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
11497 def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
11498 SPARC_BUILTIN_FHADDD, df_ftype_df_df);
11499 def_builtin_const ("__builtin_vis_fhsubs", CODE_FOR_fhsubsf_vis,
11500 SPARC_BUILTIN_FHSUBS, sf_ftype_sf_sf);
11501 def_builtin_const ("__builtin_vis_fhsubd", CODE_FOR_fhsubdf_vis,
11502 SPARC_BUILTIN_FHSUBD, df_ftype_df_df);
11503 def_builtin_const ("__builtin_vis_fnhadds", CODE_FOR_fnhaddsf_vis,
11504 SPARC_BUILTIN_FNHADDS, sf_ftype_sf_sf);
11505 def_builtin_const ("__builtin_vis_fnhaddd", CODE_FOR_fnhadddf_vis,
11506 SPARC_BUILTIN_FNHADDD, df_ftype_df_df);
11508 def_builtin_const ("__builtin_vis_umulxhi", CODE_FOR_umulxhi_vis,
11509 SPARC_BUILTIN_UMULXHI, di_ftype_di_di);
11510 def_builtin_const ("__builtin_vis_xmulx", CODE_FOR_xmulx_vis,
11511 SPARC_BUILTIN_XMULX, di_ftype_di_di);
11512 def_builtin_const ("__builtin_vis_xmulxhi", CODE_FOR_xmulxhi_vis,
11513 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
11516 if (TARGET_VIS4)
11518 def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
11519 SPARC_BUILTIN_FPADD8, v8qi_ftype_v8qi_v8qi);
11520 def_builtin_const ("__builtin_vis_fpadds8", CODE_FOR_ssaddv8qi3,
11521 SPARC_BUILTIN_FPADDS8, v8qi_ftype_v8qi_v8qi);
11522 def_builtin_const ("__builtin_vis_fpaddus8", CODE_FOR_usaddv8qi3,
11523 SPARC_BUILTIN_FPADDUS8, v8qi_ftype_v8qi_v8qi);
11524 def_builtin_const ("__builtin_vis_fpaddus16", CODE_FOR_usaddv4hi3,
11525 SPARC_BUILTIN_FPADDUS16, v4hi_ftype_v4hi_v4hi);
11528 if (TARGET_ARCH64)
11530 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8di_vis,
11531 SPARC_BUILTIN_FPCMPLE8, di_ftype_v8qi_v8qi);
11532 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8di_vis,
11533 SPARC_BUILTIN_FPCMPGT8, di_ftype_v8qi_v8qi);
11534 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16di_vis,
11535 SPARC_BUILTIN_FPCMPULE16, di_ftype_v4hi_v4hi);
11536 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16di_vis,
11537 SPARC_BUILTIN_FPCMPUGT16, di_ftype_v4hi_v4hi);
11538 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32di_vis,
11539 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11540 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32di_vis,
11541 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11543 else
11545 def_builtin_const ("__builtin_vis_fpcmple8", CODE_FOR_fpcmple8si_vis,
11546 SPARC_BUILTIN_FPCMPLE8, si_ftype_v8qi_v8qi);
11547 def_builtin_const ("__builtin_vis_fpcmpgt8", CODE_FOR_fpcmpgt8si_vis,
11548 SPARC_BUILTIN_FPCMPGT8, si_ftype_v8qi_v8qi);
11549 def_builtin_const ("__builtin_vis_fpcmpule16", CODE_FOR_fpcmpule16si_vis,
11550 SPARC_BUILTIN_FPCMPULE16, si_ftype_v4hi_v4hi);
11551 def_builtin_const ("__builtin_vis_fpcmpugt16", CODE_FOR_fpcmpugt16si_vis,
11552 SPARC_BUILTIN_FPCMPUGT16, si_ftype_v4hi_v4hi);
11553 def_builtin_const ("__builtin_vis_fpcmpule32", CODE_FOR_fpcmpule32si_vis,
11554 SPARC_BUILTIN_FPCMPULE32, di_ftype_v2si_v2si);
11555 def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
11556 SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
11559 def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
11560 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
11561 def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
11562 SPARC_BUILTIN_FPMAX16, v4hi_ftype_v4hi_v4hi);
11563 def_builtin_const ("__builtin_vis_fpmax32", CODE_FOR_maxv2si3,
11564 SPARC_BUILTIN_FPMAX32, v2si_ftype_v2si_v2si);
11565 def_builtin_const ("__builtin_vis_fpmaxu8", CODE_FOR_maxuv8qi3,
11566 SPARC_BUILTIN_FPMAXU8, v8qi_ftype_v8qi_v8qi);
11567 def_builtin_const ("__builtin_vis_fpmaxu16", CODE_FOR_maxuv4hi3,
11568 SPARC_BUILTIN_FPMAXU16, v4hi_ftype_v4hi_v4hi);
11569 def_builtin_const ("__builtin_vis_fpmaxu32", CODE_FOR_maxuv2si3,
11570 SPARC_BUILTIN_FPMAXU32, v2si_ftype_v2si_v2si);
11571 def_builtin_const ("__builtin_vis_fpmin8", CODE_FOR_minv8qi3,
11572 SPARC_BUILTIN_FPMIN8, v8qi_ftype_v8qi_v8qi);
11573 def_builtin_const ("__builtin_vis_fpmin16", CODE_FOR_minv4hi3,
11574 SPARC_BUILTIN_FPMIN16, v4hi_ftype_v4hi_v4hi);
11575 def_builtin_const ("__builtin_vis_fpmin32", CODE_FOR_minv2si3,
11576 SPARC_BUILTIN_FPMIN32, v2si_ftype_v2si_v2si);
11577 def_builtin_const ("__builtin_vis_fpminu8", CODE_FOR_minuv8qi3,
11578 SPARC_BUILTIN_FPMINU8, v8qi_ftype_v8qi_v8qi);
11579 def_builtin_const ("__builtin_vis_fpminu16", CODE_FOR_minuv4hi3,
11580 SPARC_BUILTIN_FPMINU16, v4hi_ftype_v4hi_v4hi);
11581 def_builtin_const ("__builtin_vis_fpminu32", CODE_FOR_minuv2si3,
11582 SPARC_BUILTIN_FPMINU32, v2si_ftype_v2si_v2si);
11583 def_builtin_const ("__builtin_vis_fpsub8", CODE_FOR_subv8qi3,
11584 SPARC_BUILTIN_FPSUB8, v8qi_ftype_v8qi_v8qi);
11585 def_builtin_const ("__builtin_vis_fpsubs8", CODE_FOR_sssubv8qi3,
11586 SPARC_BUILTIN_FPSUBS8, v8qi_ftype_v8qi_v8qi);
11587 def_builtin_const ("__builtin_vis_fpsubus8", CODE_FOR_ussubv8qi3,
11588 SPARC_BUILTIN_FPSUBUS8, v8qi_ftype_v8qi_v8qi);
11589 def_builtin_const ("__builtin_vis_fpsubus16", CODE_FOR_ussubv4hi3,
11590 SPARC_BUILTIN_FPSUBUS16, v4hi_ftype_v4hi_v4hi);
11593 if (TARGET_VIS4B)
11595 def_builtin_const ("__builtin_vis_dictunpack8", CODE_FOR_dictunpack8,
11596 SPARC_BUILTIN_DICTUNPACK8, v8qi_ftype_df_si);
11597 def_builtin_const ("__builtin_vis_dictunpack16", CODE_FOR_dictunpack16,
11598 SPARC_BUILTIN_DICTUNPACK16, v4hi_ftype_df_si);
11599 def_builtin_const ("__builtin_vis_dictunpack32", CODE_FOR_dictunpack32,
11600 SPARC_BUILTIN_DICTUNPACK32, v2si_ftype_df_si);
11602 if (TARGET_ARCH64)
11604 tree di_ftype_v8qi_v8qi_si = build_function_type_list (intDI_type_node,
11605 v8qi, v8qi,
11606 intSI_type_node, 0);
11607 tree di_ftype_v4hi_v4hi_si = build_function_type_list (intDI_type_node,
11608 v4hi, v4hi,
11609 intSI_type_node, 0);
11610 tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
11611 v2si, v2si,
11612 intSI_type_node, 0);
11614 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
11615 SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
11616 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
11617 SPARC_BUILTIN_FPCMPGT8SHL, di_ftype_v8qi_v8qi_si);
11618 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8dishl,
11619 SPARC_BUILTIN_FPCMPEQ8SHL, di_ftype_v8qi_v8qi_si);
11620 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8dishl,
11621 SPARC_BUILTIN_FPCMPNE8SHL, di_ftype_v8qi_v8qi_si);
11623 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16dishl,
11624 SPARC_BUILTIN_FPCMPLE16SHL, di_ftype_v4hi_v4hi_si);
11625 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16dishl,
11626 SPARC_BUILTIN_FPCMPGT16SHL, di_ftype_v4hi_v4hi_si);
11627 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16dishl,
11628 SPARC_BUILTIN_FPCMPEQ16SHL, di_ftype_v4hi_v4hi_si);
11629 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16dishl,
11630 SPARC_BUILTIN_FPCMPNE16SHL, di_ftype_v4hi_v4hi_si);
11632 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32dishl,
11633 SPARC_BUILTIN_FPCMPLE32SHL, di_ftype_v2si_v2si_si);
11634 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32dishl,
11635 SPARC_BUILTIN_FPCMPGT32SHL, di_ftype_v2si_v2si_si);
11636 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32dishl,
11637 SPARC_BUILTIN_FPCMPEQ32SHL, di_ftype_v2si_v2si_si);
11638 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32dishl,
11639 SPARC_BUILTIN_FPCMPNE32SHL, di_ftype_v2si_v2si_si);
11642 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8dishl,
11643 SPARC_BUILTIN_FPCMPULE8SHL, di_ftype_v8qi_v8qi_si);
11644 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8dishl,
11645 SPARC_BUILTIN_FPCMPUGT8SHL, di_ftype_v8qi_v8qi_si);
11647 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16dishl,
11648 SPARC_BUILTIN_FPCMPULE16SHL, di_ftype_v4hi_v4hi_si);
11649 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16dishl,
11650 SPARC_BUILTIN_FPCMPUGT16SHL, di_ftype_v4hi_v4hi_si);
11652 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32dishl,
11653 SPARC_BUILTIN_FPCMPULE32SHL, di_ftype_v2si_v2si_si);
11654 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32dishl,
11655 SPARC_BUILTIN_FPCMPUGT32SHL, di_ftype_v2si_v2si_si);
11657 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8dishl,
11658 SPARC_BUILTIN_FPCMPDE8SHL, di_ftype_v8qi_v8qi_si);
11659 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16dishl,
11660 SPARC_BUILTIN_FPCMPDE16SHL, di_ftype_v4hi_v4hi_si);
11661 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32dishl,
11662 SPARC_BUILTIN_FPCMPDE32SHL, di_ftype_v2si_v2si_si);
11664 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8dishl,
11665 SPARC_BUILTIN_FPCMPUR8SHL, di_ftype_v8qi_v8qi_si);
11666 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16dishl,
11667 SPARC_BUILTIN_FPCMPUR16SHL, di_ftype_v4hi_v4hi_si);
11668 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32dishl,
11669 SPARC_BUILTIN_FPCMPUR32SHL, di_ftype_v2si_v2si_si);
11672 else
11674 tree si_ftype_v8qi_v8qi_si = build_function_type_list (intSI_type_node,
11675 v8qi, v8qi,
11676 intSI_type_node, 0);
11677 tree si_ftype_v4hi_v4hi_si = build_function_type_list (intSI_type_node,
11678 v4hi, v4hi,
11679 intSI_type_node, 0);
11680 tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
11681 v2si, v2si,
11682 intSI_type_node, 0);
11684 def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
11685 SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
11686 def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
11687 SPARC_BUILTIN_FPCMPGT8SHL, si_ftype_v8qi_v8qi_si);
11688 def_builtin_const ("__builtin_vis_fpcmpeq8shl", CODE_FOR_fpcmpeq8sishl,
11689 SPARC_BUILTIN_FPCMPEQ8SHL, si_ftype_v8qi_v8qi_si);
11690 def_builtin_const ("__builtin_vis_fpcmpne8shl", CODE_FOR_fpcmpne8sishl,
11691 SPARC_BUILTIN_FPCMPNE8SHL, si_ftype_v8qi_v8qi_si);
11693 def_builtin_const ("__builtin_vis_fpcmple16shl", CODE_FOR_fpcmple16sishl,
11694 SPARC_BUILTIN_FPCMPLE16SHL, si_ftype_v4hi_v4hi_si);
11695 def_builtin_const ("__builtin_vis_fpcmpgt16shl", CODE_FOR_fpcmpgt16sishl,
11696 SPARC_BUILTIN_FPCMPGT16SHL, si_ftype_v4hi_v4hi_si);
11697 def_builtin_const ("__builtin_vis_fpcmpeq16shl", CODE_FOR_fpcmpeq16sishl,
11698 SPARC_BUILTIN_FPCMPEQ16SHL, si_ftype_v4hi_v4hi_si);
11699 def_builtin_const ("__builtin_vis_fpcmpne16shl", CODE_FOR_fpcmpne16sishl,
11700 SPARC_BUILTIN_FPCMPNE16SHL, si_ftype_v4hi_v4hi_si);
11702 def_builtin_const ("__builtin_vis_fpcmple32shl", CODE_FOR_fpcmple32sishl,
11703 SPARC_BUILTIN_FPCMPLE32SHL, si_ftype_v2si_v2si_si);
11704 def_builtin_const ("__builtin_vis_fpcmpgt32shl", CODE_FOR_fpcmpgt32sishl,
11705 SPARC_BUILTIN_FPCMPGT32SHL, si_ftype_v2si_v2si_si);
11706 def_builtin_const ("__builtin_vis_fpcmpeq32shl", CODE_FOR_fpcmpeq32sishl,
11707 SPARC_BUILTIN_FPCMPEQ32SHL, si_ftype_v2si_v2si_si);
11708 def_builtin_const ("__builtin_vis_fpcmpne32shl", CODE_FOR_fpcmpne32sishl,
11709 SPARC_BUILTIN_FPCMPNE32SHL, si_ftype_v2si_v2si_si);
11712 def_builtin_const ("__builtin_vis_fpcmpule8shl", CODE_FOR_fpcmpule8sishl,
11713 SPARC_BUILTIN_FPCMPULE8SHL, si_ftype_v8qi_v8qi_si);
11714 def_builtin_const ("__builtin_vis_fpcmpugt8shl", CODE_FOR_fpcmpugt8sishl,
11715 SPARC_BUILTIN_FPCMPUGT8SHL, si_ftype_v8qi_v8qi_si);
11717 def_builtin_const ("__builtin_vis_fpcmpule16shl", CODE_FOR_fpcmpule16sishl,
11718 SPARC_BUILTIN_FPCMPULE16SHL, si_ftype_v4hi_v4hi_si);
11719 def_builtin_const ("__builtin_vis_fpcmpugt16shl", CODE_FOR_fpcmpugt16sishl,
11720 SPARC_BUILTIN_FPCMPUGT16SHL, si_ftype_v4hi_v4hi_si);
11722 def_builtin_const ("__builtin_vis_fpcmpule32shl", CODE_FOR_fpcmpule32sishl,
11723 SPARC_BUILTIN_FPCMPULE32SHL, si_ftype_v2si_v2si_si);
11724 def_builtin_const ("__builtin_vis_fpcmpugt32shl", CODE_FOR_fpcmpugt32sishl,
11725 SPARC_BUILTIN_FPCMPUGT32SHL, si_ftype_v2si_v2si_si);
11727 def_builtin_const ("__builtin_vis_fpcmpde8shl", CODE_FOR_fpcmpde8sishl,
11728 SPARC_BUILTIN_FPCMPDE8SHL, si_ftype_v8qi_v8qi_si);
11729 def_builtin_const ("__builtin_vis_fpcmpde16shl", CODE_FOR_fpcmpde16sishl,
11730 SPARC_BUILTIN_FPCMPDE16SHL, si_ftype_v4hi_v4hi_si);
11731 def_builtin_const ("__builtin_vis_fpcmpde32shl", CODE_FOR_fpcmpde32sishl,
11732 SPARC_BUILTIN_FPCMPDE32SHL, si_ftype_v2si_v2si_si);
11734 def_builtin_const ("__builtin_vis_fpcmpur8shl", CODE_FOR_fpcmpur8sishl,
11735 SPARC_BUILTIN_FPCMPUR8SHL, si_ftype_v8qi_v8qi_si);
11736 def_builtin_const ("__builtin_vis_fpcmpur16shl", CODE_FOR_fpcmpur16sishl,
11737 SPARC_BUILTIN_FPCMPUR16SHL, si_ftype_v4hi_v4hi_si);
11738 def_builtin_const ("__builtin_vis_fpcmpur32shl", CODE_FOR_fpcmpur32sishl,
11739 SPARC_BUILTIN_FPCMPUR32SHL, si_ftype_v2si_v2si_si);
11744 /* Implement TARGET_BUILTIN_DECL hook. */
11746 static tree
11747 sparc_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
11749 if (code >= SPARC_BUILTIN_MAX)
11750 return error_mark_node;
11752 return sparc_builtins[code];
11755 /* Implemented TARGET_EXPAND_BUILTIN hook. */
11757 static rtx
11758 sparc_expand_builtin (tree exp, rtx target,
11759 rtx subtarget ATTRIBUTE_UNUSED,
11760 machine_mode tmode ATTRIBUTE_UNUSED,
11761 int ignore ATTRIBUTE_UNUSED)
11763 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
11764 enum sparc_builtins code
11765 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11766 enum insn_code icode = sparc_builtins_icode[code];
11767 bool nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
11768 call_expr_arg_iterator iter;
11769 int arg_count = 0;
11770 rtx pat, op[4];
11771 tree arg;
11773 if (nonvoid)
11775 machine_mode tmode = insn_data[icode].operand[0].mode;
11776 if (!target
11777 || GET_MODE (target) != tmode
11778 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
11779 op[0] = gen_reg_rtx (tmode);
11780 else
11781 op[0] = target;
11783 else
11784 op[0] = NULL_RTX;
11786 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
11788 const struct insn_operand_data *insn_op;
11789 int idx;
11791 if (arg == error_mark_node)
11792 return NULL_RTX;
11794 arg_count++;
11795 idx = arg_count - !nonvoid;
11796 insn_op = &insn_data[icode].operand[idx];
11797 op[arg_count] = expand_normal (arg);
11799 /* Some of the builtins require constant arguments. We check
11800 for this here. */
11801 if ((code >= SPARC_BUILTIN_FIRST_FPCMPSHL
11802 && code <= SPARC_BUILTIN_LAST_FPCMPSHL
11803 && arg_count == 3)
11804 || (code >= SPARC_BUILTIN_FIRST_DICTUNPACK
11805 && code <= SPARC_BUILTIN_LAST_DICTUNPACK
11806 && arg_count == 2))
11808 if (!check_constant_argument (icode, idx, op[arg_count]))
11809 return const0_rtx;
11812 if (code == SPARC_BUILTIN_LDFSR || code == SPARC_BUILTIN_STFSR)
11814 if (!address_operand (op[arg_count], SImode))
11816 op[arg_count] = convert_memory_address (Pmode, op[arg_count]);
11817 op[arg_count] = copy_addr_to_reg (op[arg_count]);
11819 op[arg_count] = gen_rtx_MEM (SImode, op[arg_count]);
11822 else if (insn_op->mode == V1DImode
11823 && GET_MODE (op[arg_count]) == DImode)
11824 op[arg_count] = gen_lowpart (V1DImode, op[arg_count]);
11826 else if (insn_op->mode == V1SImode
11827 && GET_MODE (op[arg_count]) == SImode)
11828 op[arg_count] = gen_lowpart (V1SImode, op[arg_count]);
11830 if (! (*insn_data[icode].operand[idx].predicate) (op[arg_count],
11831 insn_op->mode))
11832 op[arg_count] = copy_to_mode_reg (insn_op->mode, op[arg_count]);
11835 switch (arg_count)
11837 case 0:
11838 pat = GEN_FCN (icode) (op[0]);
11839 break;
11840 case 1:
11841 if (nonvoid)
11842 pat = GEN_FCN (icode) (op[0], op[1]);
11843 else
11844 pat = GEN_FCN (icode) (op[1]);
11845 break;
11846 case 2:
11847 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
11848 break;
11849 case 3:
11850 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
11851 break;
11852 default:
11853 gcc_unreachable ();
11856 if (!pat)
11857 return NULL_RTX;
11859 emit_insn (pat);
11861 return (nonvoid ? op[0] : const0_rtx);
11864 /* Return the upper 16 bits of the 8x16 multiplication. */
11866 static int
11867 sparc_vis_mul8x16 (int e8, int e16)
11869 return (e8 * e16 + 128) / 256;
11872 /* Multiply the VECTOR_CSTs CST0 and CST1 as specified by FNCODE and put
11873 the result into the array N_ELTS, whose elements are of INNER_TYPE. */
11875 static void
11876 sparc_handle_vis_mul8x16 (vec<tree> *n_elts, enum sparc_builtins fncode,
11877 tree inner_type, tree cst0, tree cst1)
11879 unsigned i, num = VECTOR_CST_NELTS (cst0);
11880 int scale;
11882 switch (fncode)
11884 case SPARC_BUILTIN_FMUL8X16:
11885 for (i = 0; i < num; ++i)
11887 int val
11888 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11889 TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, i)));
11890 n_elts->quick_push (build_int_cst (inner_type, val));
11892 break;
11894 case SPARC_BUILTIN_FMUL8X16AU:
11895 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 0));
11897 for (i = 0; i < num; ++i)
11899 int val
11900 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11901 scale);
11902 n_elts->quick_push (build_int_cst (inner_type, val));
11904 break;
11906 case SPARC_BUILTIN_FMUL8X16AL:
11907 scale = TREE_INT_CST_LOW (VECTOR_CST_ELT (cst1, 1));
11909 for (i = 0; i < num; ++i)
11911 int val
11912 = sparc_vis_mul8x16 (TREE_INT_CST_LOW (VECTOR_CST_ELT (cst0, i)),
11913 scale);
11914 n_elts->quick_push (build_int_cst (inner_type, val));
11916 break;
11918 default:
11919 gcc_unreachable ();
11923 /* Implement TARGET_FOLD_BUILTIN hook.
11925 Fold builtin functions for SPARC intrinsics. If IGNORE is true the
11926 result of the function call is ignored. NULL_TREE is returned if the
11927 function could not be folded. */
11929 static tree
11930 sparc_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED,
11931 tree *args, bool ignore)
11933 enum sparc_builtins code
11934 = (enum sparc_builtins) DECL_MD_FUNCTION_CODE (fndecl);
11935 tree rtype = TREE_TYPE (TREE_TYPE (fndecl));
11936 tree arg0, arg1, arg2;
11938 if (ignore)
11939 switch (code)
11941 case SPARC_BUILTIN_LDFSR:
11942 case SPARC_BUILTIN_STFSR:
11943 case SPARC_BUILTIN_ALIGNADDR:
11944 case SPARC_BUILTIN_WRGSR:
11945 case SPARC_BUILTIN_BMASK:
11946 case SPARC_BUILTIN_CMASK8:
11947 case SPARC_BUILTIN_CMASK16:
11948 case SPARC_BUILTIN_CMASK32:
11949 break;
11951 default:
11952 return build_zero_cst (rtype);
11955 switch (code)
11957 case SPARC_BUILTIN_FEXPAND:
11958 arg0 = args[0];
11959 STRIP_NOPS (arg0);
11961 if (TREE_CODE (arg0) == VECTOR_CST)
11963 tree inner_type = TREE_TYPE (rtype);
11964 unsigned i;
11966 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11967 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
11969 unsigned HOST_WIDE_INT val
11970 = TREE_INT_CST_LOW (VECTOR_CST_ELT (arg0, i));
11971 n_elts.quick_push (build_int_cst (inner_type, val << 4));
11973 return n_elts.build ();
11975 break;
11977 case SPARC_BUILTIN_FMUL8X16:
11978 case SPARC_BUILTIN_FMUL8X16AU:
11979 case SPARC_BUILTIN_FMUL8X16AL:
11980 arg0 = args[0];
11981 arg1 = args[1];
11982 STRIP_NOPS (arg0);
11983 STRIP_NOPS (arg1);
11985 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
11987 tree inner_type = TREE_TYPE (rtype);
11988 tree_vector_builder n_elts (rtype, VECTOR_CST_NELTS (arg0), 1);
11989 sparc_handle_vis_mul8x16 (&n_elts, code, inner_type, arg0, arg1);
11990 return n_elts.build ();
11992 break;
11994 case SPARC_BUILTIN_FPMERGE:
11995 arg0 = args[0];
11996 arg1 = args[1];
11997 STRIP_NOPS (arg0);
11998 STRIP_NOPS (arg1);
12000 if (TREE_CODE (arg0) == VECTOR_CST && TREE_CODE (arg1) == VECTOR_CST)
12002 tree_vector_builder n_elts (rtype, 2 * VECTOR_CST_NELTS (arg0), 1);
12003 unsigned i;
12004 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12006 n_elts.quick_push (VECTOR_CST_ELT (arg0, i));
12007 n_elts.quick_push (VECTOR_CST_ELT (arg1, i));
12010 return n_elts.build ();
12012 break;
12014 case SPARC_BUILTIN_PDIST:
12015 case SPARC_BUILTIN_PDISTN:
12016 arg0 = args[0];
12017 arg1 = args[1];
12018 STRIP_NOPS (arg0);
12019 STRIP_NOPS (arg1);
12020 if (code == SPARC_BUILTIN_PDIST)
12022 arg2 = args[2];
12023 STRIP_NOPS (arg2);
12025 else
12026 arg2 = integer_zero_node;
12028 if (TREE_CODE (arg0) == VECTOR_CST
12029 && TREE_CODE (arg1) == VECTOR_CST
12030 && TREE_CODE (arg2) == INTEGER_CST)
12032 bool overflow = false;
12033 widest_int result = wi::to_widest (arg2);
12034 widest_int tmp;
12035 unsigned i;
12037 for (i = 0; i < VECTOR_CST_NELTS (arg0); ++i)
12039 tree e0 = VECTOR_CST_ELT (arg0, i);
12040 tree e1 = VECTOR_CST_ELT (arg1, i);
12042 wi::overflow_type neg1_ovf, neg2_ovf, add1_ovf, add2_ovf;
12044 tmp = wi::neg (wi::to_widest (e1), &neg1_ovf);
12045 tmp = wi::add (wi::to_widest (e0), tmp, SIGNED, &add1_ovf);
12046 if (wi::neg_p (tmp))
12047 tmp = wi::neg (tmp, &neg2_ovf);
12048 else
12049 neg2_ovf = wi::OVF_NONE;
12050 result = wi::add (result, tmp, SIGNED, &add2_ovf);
12051 overflow |= ((neg1_ovf != wi::OVF_NONE)
12052 | (neg2_ovf != wi::OVF_NONE)
12053 | (add1_ovf != wi::OVF_NONE)
12054 | (add2_ovf != wi::OVF_NONE));
12057 gcc_assert (!overflow);
12059 return wide_int_to_tree (rtype, result);
12062 default:
12063 break;
12066 return NULL_TREE;
12069 /* ??? This duplicates information provided to the compiler by the
12070 ??? scheduler description. Some day, teach genautomata to output
12071 ??? the latencies and then CSE will just use that. */
12073 static bool
12074 sparc_rtx_costs (rtx x, machine_mode mode, int outer_code,
12075 int opno ATTRIBUTE_UNUSED,
12076 int *total, bool speed ATTRIBUTE_UNUSED)
12078 int code = GET_CODE (x);
12079 bool float_mode_p = FLOAT_MODE_P (mode);
12081 switch (code)
12083 case CONST_INT:
12084 if (SMALL_INT (x))
12085 *total = 0;
12086 else
12087 *total = 2;
12088 return true;
12090 case CONST_WIDE_INT:
12091 *total = 0;
12092 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 0)))
12093 *total += 2;
12094 if (!SPARC_SIMM13_P (CONST_WIDE_INT_ELT (x, 1)))
12095 *total += 2;
12096 return true;
12098 case HIGH:
12099 *total = 2;
12100 return true;
12102 case CONST:
12103 case LABEL_REF:
12104 case SYMBOL_REF:
12105 *total = 4;
12106 return true;
12108 case CONST_DOUBLE:
12109 *total = 8;
12110 return true;
12112 case MEM:
12113 /* If outer-code was a sign or zero extension, a cost
12114 of COSTS_N_INSNS (1) was already added in. This is
12115 why we are subtracting it back out. */
12116 if (outer_code == ZERO_EXTEND)
12118 *total = sparc_costs->int_zload - COSTS_N_INSNS (1);
12120 else if (outer_code == SIGN_EXTEND)
12122 *total = sparc_costs->int_sload - COSTS_N_INSNS (1);
12124 else if (float_mode_p)
12126 *total = sparc_costs->float_load;
12128 else
12130 *total = sparc_costs->int_load;
12133 return true;
12135 case PLUS:
12136 case MINUS:
12137 if (float_mode_p)
12138 *total = sparc_costs->float_plusminus;
12139 else
12140 *total = COSTS_N_INSNS (1);
12141 return false;
12143 case FMA:
12145 rtx sub;
12147 gcc_assert (float_mode_p);
12148 *total = sparc_costs->float_mul;
12150 sub = XEXP (x, 0);
12151 if (GET_CODE (sub) == NEG)
12152 sub = XEXP (sub, 0);
12153 *total += rtx_cost (sub, mode, FMA, 0, speed);
12155 sub = XEXP (x, 2);
12156 if (GET_CODE (sub) == NEG)
12157 sub = XEXP (sub, 0);
12158 *total += rtx_cost (sub, mode, FMA, 2, speed);
12159 return true;
12162 case MULT:
12163 if (float_mode_p)
12164 *total = sparc_costs->float_mul;
12165 else if (TARGET_ARCH32 && !TARGET_HARD_MUL)
12166 *total = COSTS_N_INSNS (25);
12167 else
12169 int bit_cost;
12171 bit_cost = 0;
12172 if (sparc_costs->int_mul_bit_factor)
12174 int nbits;
12176 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
12178 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
12179 for (nbits = 0; value != 0; value &= value - 1)
12180 nbits++;
12182 else
12183 nbits = 7;
12185 if (nbits < 3)
12186 nbits = 3;
12187 bit_cost = (nbits - 3) / sparc_costs->int_mul_bit_factor;
12188 bit_cost = COSTS_N_INSNS (bit_cost);
12191 if (mode == DImode || !TARGET_HARD_MUL)
12192 *total = sparc_costs->int_mulX + bit_cost;
12193 else
12194 *total = sparc_costs->int_mul + bit_cost;
12196 return false;
12198 case ASHIFT:
12199 case ASHIFTRT:
12200 case LSHIFTRT:
12201 *total = COSTS_N_INSNS (1) + sparc_costs->shift_penalty;
12202 return false;
12204 case DIV:
12205 case UDIV:
12206 case MOD:
12207 case UMOD:
12208 if (float_mode_p)
12210 if (mode == DFmode)
12211 *total = sparc_costs->float_div_df;
12212 else
12213 *total = sparc_costs->float_div_sf;
12215 else
12217 if (mode == DImode)
12218 *total = sparc_costs->int_divX;
12219 else
12220 *total = sparc_costs->int_div;
12222 return false;
12224 case NEG:
12225 if (! float_mode_p)
12227 *total = COSTS_N_INSNS (1);
12228 return false;
12230 /* FALLTHRU */
12232 case ABS:
12233 case FLOAT:
12234 case UNSIGNED_FLOAT:
12235 case FIX:
12236 case UNSIGNED_FIX:
12237 case FLOAT_EXTEND:
12238 case FLOAT_TRUNCATE:
12239 *total = sparc_costs->float_move;
12240 return false;
12242 case SQRT:
12243 if (mode == DFmode)
12244 *total = sparc_costs->float_sqrt_df;
12245 else
12246 *total = sparc_costs->float_sqrt_sf;
12247 return false;
12249 case COMPARE:
12250 if (float_mode_p)
12251 *total = sparc_costs->float_cmp;
12252 else
12253 *total = COSTS_N_INSNS (1);
12254 return false;
12256 case IF_THEN_ELSE:
12257 if (float_mode_p)
12258 *total = sparc_costs->float_cmove;
12259 else
12260 *total = sparc_costs->int_cmove;
12261 return false;
12263 case IOR:
12264 /* Handle the NAND vector patterns. */
12265 if (sparc_vector_mode_supported_p (mode)
12266 && GET_CODE (XEXP (x, 0)) == NOT
12267 && GET_CODE (XEXP (x, 1)) == NOT)
12269 *total = COSTS_N_INSNS (1);
12270 return true;
12272 else
12273 return false;
12275 default:
12276 return false;
12280 /* Return true if CLASS is either GENERAL_REGS or I64_REGS. */
12282 static inline bool
12283 general_or_i64_p (reg_class_t rclass)
12285 return (rclass == GENERAL_REGS || rclass == I64_REGS);
12288 /* Implement TARGET_REGISTER_MOVE_COST. */
12290 static int
12291 sparc_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
12292 reg_class_t from, reg_class_t to)
12294 bool need_memory = false;
12296 /* This helps postreload CSE to eliminate redundant comparisons. */
12297 if (from == NO_REGS || to == NO_REGS)
12298 return 100;
12300 if (from == FPCC_REGS || to == FPCC_REGS)
12301 need_memory = true;
12302 else if ((FP_REG_CLASS_P (from) && general_or_i64_p (to))
12303 || (general_or_i64_p (from) && FP_REG_CLASS_P (to)))
12305 if (TARGET_VIS3)
12307 int size = GET_MODE_SIZE (mode);
12308 if (size == 8 || size == 4)
12310 if (! TARGET_ARCH32 || size == 4)
12311 return 4;
12312 else
12313 return 6;
12316 need_memory = true;
12319 if (need_memory)
12321 if (sparc_cpu == PROCESSOR_ULTRASPARC
12322 || sparc_cpu == PROCESSOR_ULTRASPARC3
12323 || sparc_cpu == PROCESSOR_NIAGARA
12324 || sparc_cpu == PROCESSOR_NIAGARA2
12325 || sparc_cpu == PROCESSOR_NIAGARA3
12326 || sparc_cpu == PROCESSOR_NIAGARA4
12327 || sparc_cpu == PROCESSOR_NIAGARA7
12328 || sparc_cpu == PROCESSOR_M8)
12329 return 12;
12331 return 6;
12334 return 2;
12337 /* Emit the sequence of insns SEQ while preserving the registers REG and REG2.
12338 This is achieved by means of a manual dynamic stack space allocation in
12339 the current frame. We make the assumption that SEQ doesn't contain any
12340 function calls, with the possible exception of calls to the GOT helper. */
12342 static void
12343 emit_and_preserve (rtx seq, rtx reg, rtx reg2)
12345 /* We must preserve the lowest 16 words for the register save area. */
12346 HOST_WIDE_INT offset = 16*UNITS_PER_WORD;
12347 /* We really need only 2 words of fresh stack space. */
12348 HOST_WIDE_INT size = SPARC_STACK_ALIGN (offset + 2*UNITS_PER_WORD);
12350 rtx slot
12351 = gen_rtx_MEM (word_mode, plus_constant (Pmode, stack_pointer_rtx,
12352 SPARC_STACK_BIAS + offset));
12354 emit_insn (gen_stack_pointer_inc (GEN_INT (-size)));
12355 emit_insn (gen_rtx_SET (slot, reg));
12356 if (reg2)
12357 emit_insn (gen_rtx_SET (adjust_address (slot, word_mode, UNITS_PER_WORD),
12358 reg2));
12359 emit_insn (seq);
12360 if (reg2)
12361 emit_insn (gen_rtx_SET (reg2,
12362 adjust_address (slot, word_mode, UNITS_PER_WORD)));
12363 emit_insn (gen_rtx_SET (reg, slot));
12364 emit_insn (gen_stack_pointer_inc (GEN_INT (size)));
12367 /* Output the assembler code for a thunk function. THUNK_DECL is the
12368 declaration for the thunk function itself, FUNCTION is the decl for
12369 the target function. DELTA is an immediate constant offset to be
12370 added to THIS. If VCALL_OFFSET is nonzero, the word at address
12371 (*THIS + VCALL_OFFSET) should be additionally added to THIS. */
12373 static void
12374 sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
12375 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12376 tree function)
12378 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
12379 rtx this_rtx, funexp;
12380 rtx_insn *insn;
12381 unsigned int int_arg_first;
12383 reload_completed = 1;
12384 epilogue_completed = 1;
12386 emit_note (NOTE_INSN_PROLOGUE_END);
12388 if (TARGET_FLAT)
12390 sparc_leaf_function_p = 1;
12392 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12394 else if (flag_delayed_branch)
12396 /* We will emit a regular sibcall below, so we need to instruct
12397 output_sibcall that we are in a leaf function. */
12398 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 1;
12400 /* This will cause final.cc to invoke leaf_renumber_regs so we
12401 must behave as if we were in a not-yet-leafified function. */
12402 int_arg_first = SPARC_INCOMING_INT_ARG_FIRST;
12404 else
12406 /* We will emit the sibcall manually below, so we will need to
12407 manually spill non-leaf registers. */
12408 sparc_leaf_function_p = crtl->uses_only_leaf_regs = 0;
12410 /* We really are in a leaf function. */
12411 int_arg_first = SPARC_OUTGOING_INT_ARG_FIRST;
12414 /* Find the "this" pointer. Normally in %o0, but in ARCH64 if the function
12415 returns a structure, the structure return pointer is there instead. */
12416 if (TARGET_ARCH64
12417 && aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12418 this_rtx = gen_rtx_REG (Pmode, int_arg_first + 1);
12419 else
12420 this_rtx = gen_rtx_REG (Pmode, int_arg_first);
12422 /* Add DELTA. When possible use a plain add, otherwise load it into
12423 a register first. */
12424 if (delta)
12426 rtx delta_rtx = GEN_INT (delta);
12428 if (! SPARC_SIMM13_P (delta))
12430 rtx scratch = gen_rtx_REG (Pmode, 1);
12431 emit_move_insn (scratch, delta_rtx);
12432 delta_rtx = scratch;
12435 /* THIS_RTX += DELTA. */
12436 emit_insn (gen_add2_insn (this_rtx, delta_rtx));
12439 /* Add the word at address (*THIS_RTX + VCALL_OFFSET). */
12440 if (vcall_offset)
12442 rtx vcall_offset_rtx = GEN_INT (vcall_offset);
12443 rtx scratch = gen_rtx_REG (Pmode, 1);
12445 gcc_assert (vcall_offset < 0);
12447 /* SCRATCH = *THIS_RTX. */
12448 emit_move_insn (scratch, gen_rtx_MEM (Pmode, this_rtx));
12450 /* Prepare for adding VCALL_OFFSET. The difficulty is that we
12451 may not have any available scratch register at this point. */
12452 if (SPARC_SIMM13_P (vcall_offset))
12454 /* This is the case if ARCH64 (unless -ffixed-g5 is passed). */
12455 else if (! fixed_regs[5]
12456 /* The below sequence is made up of at least 2 insns,
12457 while the default method may need only one. */
12458 && vcall_offset < -8192)
12460 rtx scratch2 = gen_rtx_REG (Pmode, 5);
12461 emit_move_insn (scratch2, vcall_offset_rtx);
12462 vcall_offset_rtx = scratch2;
12464 else
12466 rtx increment = GEN_INT (-4096);
12468 /* VCALL_OFFSET is a negative number whose typical range can be
12469 estimated as -32768..0 in 32-bit mode. In almost all cases
12470 it is therefore cheaper to emit multiple add insns than
12471 spilling and loading the constant into a register (at least
12472 6 insns). */
12473 while (! SPARC_SIMM13_P (vcall_offset))
12475 emit_insn (gen_add2_insn (scratch, increment));
12476 vcall_offset += 4096;
12478 vcall_offset_rtx = GEN_INT (vcall_offset); /* cannot be 0 */
12481 /* SCRATCH = *(*THIS_RTX + VCALL_OFFSET). */
12482 emit_move_insn (scratch, gen_rtx_MEM (Pmode,
12483 gen_rtx_PLUS (Pmode,
12484 scratch,
12485 vcall_offset_rtx)));
12487 /* THIS_RTX += *(*THIS_RTX + VCALL_OFFSET). */
12488 emit_insn (gen_add2_insn (this_rtx, scratch));
12491 /* Generate a tail call to the target function. */
12492 if (! TREE_USED (function))
12494 assemble_external (function);
12495 TREE_USED (function) = 1;
12497 funexp = XEXP (DECL_RTL (function), 0);
12499 if (flag_delayed_branch)
12501 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
12502 insn = emit_call_insn (gen_sibcall (funexp));
12503 SIBLING_CALL_P (insn) = 1;
12505 else
12507 /* The hoops we have to jump through in order to generate a sibcall
12508 without using delay slots... */
12509 rtx spill_reg, seq, scratch = gen_rtx_REG (Pmode, 1);
12511 if (flag_pic)
12513 spill_reg = gen_rtx_REG (word_mode, 15); /* %o7 */
12514 start_sequence ();
12515 load_got_register (); /* clobbers %o7 */
12516 if (!TARGET_VXWORKS_RTP)
12517 pic_offset_table_rtx = got_register_rtx;
12518 scratch = sparc_legitimize_pic_address (funexp, scratch);
12519 seq = get_insns ();
12520 end_sequence ();
12521 emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
12523 else if (TARGET_ARCH32)
12525 emit_insn (gen_rtx_SET (scratch,
12526 gen_rtx_HIGH (SImode, funexp)));
12527 emit_insn (gen_rtx_SET (scratch,
12528 gen_rtx_LO_SUM (SImode, scratch, funexp)));
12530 else /* TARGET_ARCH64 */
12532 switch (sparc_code_model)
12534 case CM_MEDLOW:
12535 case CM_MEDMID:
12536 /* The destination can serve as a temporary. */
12537 sparc_emit_set_symbolic_const64 (scratch, funexp, scratch);
12538 break;
12540 case CM_MEDANY:
12541 case CM_EMBMEDANY:
12542 /* The destination cannot serve as a temporary. */
12543 spill_reg = gen_rtx_REG (DImode, 15); /* %o7 */
12544 start_sequence ();
12545 sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
12546 seq = get_insns ();
12547 end_sequence ();
12548 emit_and_preserve (seq, spill_reg, 0);
12549 break;
12551 default:
12552 gcc_unreachable ();
12556 emit_jump_insn (gen_indirect_jump (scratch));
12559 emit_barrier ();
12561 /* Run just enough of rest_of_compilation to get the insns emitted.
12562 There's not really enough bulk here to make other passes such as
12563 instruction scheduling worth while. */
12564 insn = get_insns ();
12565 shorten_branches (insn);
12566 assemble_start_function (thunk_fndecl, fnname);
12567 final_start_function (insn, file, 1);
12568 final (insn, file, 1);
12569 final_end_function ();
12570 assemble_end_function (thunk_fndecl, fnname);
12572 reload_completed = 0;
12573 epilogue_completed = 0;
12576 /* Return true if sparc_output_mi_thunk would be able to output the
12577 assembler code for the thunk function specified by the arguments
12578 it is passed, and false otherwise. */
12579 static bool
12580 sparc_can_output_mi_thunk (const_tree thunk_fndecl ATTRIBUTE_UNUSED,
12581 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
12582 HOST_WIDE_INT vcall_offset,
12583 const_tree function ATTRIBUTE_UNUSED)
12585 /* Bound the loop used in the default method above. */
12586 return (vcall_offset >= -32768 || ! fixed_regs[5]);
12589 /* How to allocate a 'struct machine_function'. */
12591 static struct machine_function *
12592 sparc_init_machine_status (void)
12594 return ggc_cleared_alloc<machine_function> ();
12597 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
12599 static unsigned HOST_WIDE_INT
12600 sparc_asan_shadow_offset (void)
12602 return TARGET_ARCH64 ? (HOST_WIDE_INT_1 << 43) : (HOST_WIDE_INT_1 << 29);
12605 /* This is called from dwarf2out.cc via TARGET_ASM_OUTPUT_DWARF_DTPREL.
12606 We need to emit DTP-relative relocations. */
12608 static void
12609 sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
12611 switch (size)
12613 case 4:
12614 fputs ("\t.word\t%r_tls_dtpoff32(", file);
12615 break;
12616 case 8:
12617 fputs ("\t.xword\t%r_tls_dtpoff64(", file);
12618 break;
12619 default:
12620 gcc_unreachable ();
12622 output_addr_const (file, x);
12623 fputs (")", file);
12626 /* Implement TARGET_OUTPUT_CFI_DIRECTIVE. */
12627 static bool
12628 sparc_output_cfi_directive (FILE *f, dw_cfi_ref cfi)
12630 if (cfi->dw_cfi_opc == DW_CFA_GNU_window_save)
12632 fprintf (f, "\t.cfi_window_save\n");
12633 return true;
12635 return false;
12638 /* Implement TARGET_DW_CFI_OPRND1_DESC. */
12639 static bool
12640 sparc_dw_cfi_oprnd1_desc (dwarf_call_frame_info cfi_opc,
12641 dw_cfi_oprnd_type &oprnd_type)
12643 if (cfi_opc == DW_CFA_GNU_window_save)
12645 oprnd_type = dw_cfi_oprnd_unused;
12646 return true;
12648 return false;
12651 /* Do whatever processing is required at the end of a file. */
12653 static void
12654 sparc_file_end (void)
12656 /* If we need to emit the special GOT helper function, do so now. */
12657 if (got_helper_needed)
12659 const char *name = XSTR (got_helper_rtx, 0);
12660 #ifdef DWARF2_UNWIND_INFO
12661 bool do_cfi;
12662 #endif
12664 if (USE_HIDDEN_LINKONCE)
12666 tree decl = build_decl (BUILTINS_LOCATION, FUNCTION_DECL,
12667 get_identifier (name),
12668 build_function_type_list (void_type_node,
12669 NULL_TREE));
12670 DECL_RESULT (decl) = build_decl (BUILTINS_LOCATION, RESULT_DECL,
12671 NULL_TREE, void_type_node);
12672 TREE_PUBLIC (decl) = 1;
12673 TREE_STATIC (decl) = 1;
12674 make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
12675 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
12676 DECL_VISIBILITY_SPECIFIED (decl) = 1;
12677 resolve_unique_section (decl, 0, flag_function_sections);
12678 allocate_struct_function (decl, true);
12679 cfun->is_thunk = 1;
12680 current_function_decl = decl;
12681 init_varasm_status ();
12682 assemble_start_function (decl, name);
12684 else
12686 const int align = floor_log2 (FUNCTION_BOUNDARY / BITS_PER_UNIT);
12687 switch_to_section (text_section);
12688 if (align > 0)
12689 ASM_OUTPUT_ALIGN (asm_out_file, align);
12690 ASM_OUTPUT_LABEL (asm_out_file, name);
12693 #ifdef DWARF2_UNWIND_INFO
12694 do_cfi = dwarf2out_do_cfi_asm ();
12695 if (do_cfi)
12696 output_asm_insn (".cfi_startproc", NULL);
12697 #endif
12698 if (flag_delayed_branch)
12700 output_asm_insn ("jmp\t%%o7+8", NULL);
12701 output_asm_insn (" add\t%%o7, %0, %0", &got_register_rtx);
12703 else
12705 output_asm_insn ("add\t%%o7, %0, %0", &got_register_rtx);
12706 output_asm_insn ("jmp\t%%o7+8", NULL);
12707 output_asm_insn (" nop", NULL);
12709 #ifdef DWARF2_UNWIND_INFO
12710 if (do_cfi)
12711 output_asm_insn (".cfi_endproc", NULL);
12712 #endif
12715 if (NEED_INDICATE_EXEC_STACK)
12716 file_end_indicate_exec_stack ();
12718 #ifdef TARGET_SOLARIS
12719 solaris_file_end ();
12720 #endif
12723 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
12724 /* Implement TARGET_MANGLE_TYPE. */
12726 static const char *
12727 sparc_mangle_type (const_tree type)
12729 if (TARGET_ARCH32
12730 && TYPE_MAIN_VARIANT (type) == long_double_type_node
12731 && TARGET_LONG_DOUBLE_128)
12732 return "g";
12734 /* For all other types, use normal C++ mangling. */
12735 return NULL;
12737 #endif
12739 /* Expand a membar instruction for various use cases. Both the LOAD_STORE
12740 and BEFORE_AFTER arguments of the form X_Y. They are two-bit masks where
12741 bit 0 indicates that X is true, and bit 1 indicates Y is true. */
12743 void
12744 sparc_emit_membar_for_model (enum memmodel model,
12745 int load_store, int before_after)
12747 /* Bits for the MEMBAR mmask field. */
12748 const int LoadLoad = 1;
12749 const int StoreLoad = 2;
12750 const int LoadStore = 4;
12751 const int StoreStore = 8;
12753 int mm = 0, implied = 0;
12755 switch (sparc_memory_model)
12757 case SMM_SC:
12758 /* Sequential Consistency. All memory transactions are immediately
12759 visible in sequential execution order. No barriers needed. */
12760 implied = LoadLoad | StoreLoad | LoadStore | StoreStore;
12761 break;
12763 case SMM_TSO:
12764 /* Total Store Ordering: all memory transactions with store semantics
12765 are followed by an implied StoreStore. */
12766 implied |= StoreStore;
12768 /* If we're not looking for a raw barrer (before+after), then atomic
12769 operations get the benefit of being both load and store. */
12770 if (load_store == 3 && before_after == 1)
12771 implied |= StoreLoad;
12772 /* FALLTHRU */
12774 case SMM_PSO:
12775 /* Partial Store Ordering: all memory transactions with load semantics
12776 are followed by an implied LoadLoad | LoadStore. */
12777 implied |= LoadLoad | LoadStore;
12779 /* If we're not looking for a raw barrer (before+after), then atomic
12780 operations get the benefit of being both load and store. */
12781 if (load_store == 3 && before_after == 2)
12782 implied |= StoreLoad | StoreStore;
12783 /* FALLTHRU */
12785 case SMM_RMO:
12786 /* Relaxed Memory Ordering: no implicit bits. */
12787 break;
12789 default:
12790 gcc_unreachable ();
12793 if (before_after & 1)
12795 if (is_mm_release (model) || is_mm_acq_rel (model)
12796 || is_mm_seq_cst (model))
12798 if (load_store & 1)
12799 mm |= LoadLoad | StoreLoad;
12800 if (load_store & 2)
12801 mm |= LoadStore | StoreStore;
12804 if (before_after & 2)
12806 if (is_mm_acquire (model) || is_mm_acq_rel (model)
12807 || is_mm_seq_cst (model))
12809 if (load_store & 1)
12810 mm |= LoadLoad | LoadStore;
12811 if (load_store & 2)
12812 mm |= StoreLoad | StoreStore;
12816 /* Remove the bits implied by the system memory model. */
12817 mm &= ~implied;
12819 /* For raw barriers (before+after), always emit a barrier.
12820 This will become a compile-time barrier if needed. */
12821 if (mm || before_after == 3)
12822 emit_insn (gen_membar (GEN_INT (mm)));
12825 /* Expand code to perform a 8 or 16-bit compare and swap by doing 32-bit
12826 compare and swap on the word containing the byte or half-word. */
12828 static void
12829 sparc_expand_compare_and_swap_12 (rtx bool_result, rtx result, rtx mem,
12830 rtx oldval, rtx newval)
12832 rtx addr1 = force_reg (Pmode, XEXP (mem, 0));
12833 rtx addr = gen_reg_rtx (Pmode);
12834 rtx off = gen_reg_rtx (SImode);
12835 rtx oldv = gen_reg_rtx (SImode);
12836 rtx newv = gen_reg_rtx (SImode);
12837 rtx oldvalue = gen_reg_rtx (SImode);
12838 rtx newvalue = gen_reg_rtx (SImode);
12839 rtx res = gen_reg_rtx (SImode);
12840 rtx resv = gen_reg_rtx (SImode);
12841 rtx memsi, val, mask, cc;
12843 emit_insn (gen_rtx_SET (addr, gen_rtx_AND (Pmode, addr1, GEN_INT (-4))));
12845 if (Pmode != SImode)
12846 addr1 = gen_lowpart (SImode, addr1);
12847 emit_insn (gen_rtx_SET (off, gen_rtx_AND (SImode, addr1, GEN_INT (3))));
12849 memsi = gen_rtx_MEM (SImode, addr);
12850 set_mem_alias_set (memsi, ALIAS_SET_MEMORY_BARRIER);
12851 MEM_VOLATILE_P (memsi) = MEM_VOLATILE_P (mem);
12853 val = copy_to_reg (memsi);
12855 emit_insn (gen_rtx_SET (off,
12856 gen_rtx_XOR (SImode, off,
12857 GEN_INT (GET_MODE (mem) == QImode
12858 ? 3 : 2))));
12860 emit_insn (gen_rtx_SET (off, gen_rtx_ASHIFT (SImode, off, GEN_INT (3))));
12862 if (GET_MODE (mem) == QImode)
12863 mask = force_reg (SImode, GEN_INT (0xff));
12864 else
12865 mask = force_reg (SImode, GEN_INT (0xffff));
12867 emit_insn (gen_rtx_SET (mask, gen_rtx_ASHIFT (SImode, mask, off)));
12869 emit_insn (gen_rtx_SET (val,
12870 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12871 val)));
12873 oldval = gen_lowpart (SImode, oldval);
12874 emit_insn (gen_rtx_SET (oldv, gen_rtx_ASHIFT (SImode, oldval, off)));
12876 newval = gen_lowpart_common (SImode, newval);
12877 emit_insn (gen_rtx_SET (newv, gen_rtx_ASHIFT (SImode, newval, off)));
12879 emit_insn (gen_rtx_SET (oldv, gen_rtx_AND (SImode, oldv, mask)));
12881 emit_insn (gen_rtx_SET (newv, gen_rtx_AND (SImode, newv, mask)));
12883 rtx_code_label *end_label = gen_label_rtx ();
12884 rtx_code_label *loop_label = gen_label_rtx ();
12885 emit_label (loop_label);
12887 emit_insn (gen_rtx_SET (oldvalue, gen_rtx_IOR (SImode, oldv, val)));
12889 emit_insn (gen_rtx_SET (newvalue, gen_rtx_IOR (SImode, newv, val)));
12891 emit_move_insn (bool_result, const1_rtx);
12893 emit_insn (gen_atomic_compare_and_swapsi_1 (res, memsi, oldvalue, newvalue));
12895 emit_cmp_and_jump_insns (res, oldvalue, EQ, NULL, SImode, 0, end_label);
12897 emit_insn (gen_rtx_SET (resv,
12898 gen_rtx_AND (SImode, gen_rtx_NOT (SImode, mask),
12899 res)));
12901 emit_move_insn (bool_result, const0_rtx);
12903 cc = gen_compare_reg_1 (NE, resv, val);
12904 emit_insn (gen_rtx_SET (val, resv));
12906 /* Use cbranchcc4 to separate the compare and branch! */
12907 emit_jump_insn (gen_cbranchcc4 (gen_rtx_NE (VOIDmode, cc, const0_rtx),
12908 cc, const0_rtx, loop_label));
12910 emit_label (end_label);
12912 emit_insn (gen_rtx_SET (res, gen_rtx_AND (SImode, res, mask)));
12914 emit_insn (gen_rtx_SET (res, gen_rtx_LSHIFTRT (SImode, res, off)));
12916 emit_move_insn (result, gen_lowpart (GET_MODE (result), res));
12919 /* Expand code to perform a compare-and-swap. */
12921 void
12922 sparc_expand_compare_and_swap (rtx operands[])
12924 rtx bval, retval, mem, oldval, newval;
12925 machine_mode mode;
12926 enum memmodel model;
12928 bval = operands[0];
12929 retval = operands[1];
12930 mem = operands[2];
12931 oldval = operands[3];
12932 newval = operands[4];
12933 model = (enum memmodel) INTVAL (operands[6]);
12934 mode = GET_MODE (mem);
12936 sparc_emit_membar_for_model (model, 3, 1);
12938 if (reg_overlap_mentioned_p (retval, oldval))
12939 oldval = copy_to_reg (oldval);
12941 if (mode == QImode || mode == HImode)
12942 sparc_expand_compare_and_swap_12 (bval, retval, mem, oldval, newval);
12943 else
12945 rtx (*gen) (rtx, rtx, rtx, rtx);
12946 rtx x;
12948 if (mode == SImode)
12949 gen = gen_atomic_compare_and_swapsi_1;
12950 else
12951 gen = gen_atomic_compare_and_swapdi_1;
12952 emit_insn (gen (retval, mem, oldval, newval));
12954 x = emit_store_flag (bval, EQ, retval, oldval, mode, 1, 1);
12955 if (x != bval)
12956 convert_move (bval, x, 1);
12959 sparc_emit_membar_for_model (model, 3, 2);
12962 void
12963 sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
12965 rtx t_1, t_2, t_3;
12967 sel = gen_lowpart (DImode, sel);
12968 switch (vmode)
12970 case E_V2SImode:
12971 /* inp = xxxxxxxAxxxxxxxB */
12972 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12973 NULL_RTX, 1, OPTAB_DIRECT);
12974 /* t_1 = ....xxxxxxxAxxx. */
12975 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
12976 GEN_INT (3), NULL_RTX, 1, OPTAB_DIRECT);
12977 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
12978 GEN_INT (0x30000), NULL_RTX, 1, OPTAB_DIRECT);
12979 /* sel = .......B */
12980 /* t_1 = ...A.... */
12981 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
12982 /* sel = ...A...B */
12983 sel = expand_mult (SImode, sel, GEN_INT (0x4444), sel, 1);
12984 /* sel = AAAABBBB * 4 */
12985 t_1 = force_reg (SImode, GEN_INT (0x01230123));
12986 /* sel = { A*4, A*4+1, A*4+2, ... } */
12987 break;
12989 case E_V4HImode:
12990 /* inp = xxxAxxxBxxxCxxxD */
12991 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
12992 NULL_RTX, 1, OPTAB_DIRECT);
12993 t_2 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
12994 NULL_RTX, 1, OPTAB_DIRECT);
12995 t_3 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (24),
12996 NULL_RTX, 1, OPTAB_DIRECT);
12997 /* t_1 = ..xxxAxxxBxxxCxx */
12998 /* t_2 = ....xxxAxxxBxxxC */
12999 /* t_3 = ......xxxAxxxBxx */
13000 sel = expand_simple_binop (SImode, AND, gen_lowpart (SImode, sel),
13001 GEN_INT (0x07),
13002 NULL_RTX, 1, OPTAB_DIRECT);
13003 t_1 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_1),
13004 GEN_INT (0x0700),
13005 NULL_RTX, 1, OPTAB_DIRECT);
13006 t_2 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_2),
13007 GEN_INT (0x070000),
13008 NULL_RTX, 1, OPTAB_DIRECT);
13009 t_3 = expand_simple_binop (SImode, AND, gen_lowpart (SImode, t_3),
13010 GEN_INT (0x07000000),
13011 NULL_RTX, 1, OPTAB_DIRECT);
13012 /* sel = .......D */
13013 /* t_1 = .....C.. */
13014 /* t_2 = ...B.... */
13015 /* t_3 = .A...... */
13016 sel = expand_simple_binop (SImode, IOR, sel, t_1, sel, 1, OPTAB_DIRECT);
13017 t_2 = expand_simple_binop (SImode, IOR, t_2, t_3, t_2, 1, OPTAB_DIRECT);
13018 sel = expand_simple_binop (SImode, IOR, sel, t_2, sel, 1, OPTAB_DIRECT);
13019 /* sel = .A.B.C.D */
13020 sel = expand_mult (SImode, sel, GEN_INT (0x22), sel, 1);
13021 /* sel = AABBCCDD * 2 */
13022 t_1 = force_reg (SImode, GEN_INT (0x01010101));
13023 /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
13024 break;
13026 case E_V8QImode:
13027 /* input = xAxBxCxDxExFxGxH */
13028 sel = expand_simple_binop (DImode, AND, sel,
13029 GEN_INT ((HOST_WIDE_INT)0x0f0f0f0f << 32
13030 | 0x0f0f0f0f),
13031 NULL_RTX, 1, OPTAB_DIRECT);
13032 /* sel = .A.B.C.D.E.F.G.H */
13033 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (4),
13034 NULL_RTX, 1, OPTAB_DIRECT);
13035 /* t_1 = ..A.B.C.D.E.F.G. */
13036 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13037 NULL_RTX, 1, OPTAB_DIRECT);
13038 /* sel = .AABBCCDDEEFFGGH */
13039 sel = expand_simple_binop (DImode, AND, sel,
13040 GEN_INT ((HOST_WIDE_INT)0xff00ff << 32
13041 | 0xff00ff),
13042 NULL_RTX, 1, OPTAB_DIRECT);
13043 /* sel = ..AB..CD..EF..GH */
13044 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (8),
13045 NULL_RTX, 1, OPTAB_DIRECT);
13046 /* t_1 = ....AB..CD..EF.. */
13047 sel = expand_simple_binop (DImode, IOR, sel, t_1,
13048 NULL_RTX, 1, OPTAB_DIRECT);
13049 /* sel = ..ABABCDCDEFEFGH */
13050 sel = expand_simple_binop (DImode, AND, sel,
13051 GEN_INT ((HOST_WIDE_INT)0xffff << 32 | 0xffff),
13052 NULL_RTX, 1, OPTAB_DIRECT);
13053 /* sel = ....ABCD....EFGH */
13054 t_1 = expand_simple_binop (DImode, LSHIFTRT, sel, GEN_INT (16),
13055 NULL_RTX, 1, OPTAB_DIRECT);
13056 /* t_1 = ........ABCD.... */
13057 sel = gen_lowpart (SImode, sel);
13058 t_1 = gen_lowpart (SImode, t_1);
13059 break;
13061 default:
13062 gcc_unreachable ();
13065 /* Always perform the final addition/merge within the bmask insn. */
13066 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
13069 /* Implement TARGET_VEC_PERM_CONST. */
13071 static bool
13072 sparc_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
13073 rtx target, rtx op0, rtx op1,
13074 const vec_perm_indices &sel)
13076 if (vmode != op_mode)
13077 return false;
13079 if (!TARGET_VIS2)
13080 return false;
13082 /* All 8-byte permutes are supported. */
13083 if (!target)
13084 return GET_MODE_SIZE (vmode) == 8;
13086 /* Force target-independent code to convert constant permutations on other
13087 modes down to V8QI. Rely on this to avoid the complexity of the byte
13088 order of the permutation. */
13089 if (vmode != V8QImode)
13090 return false;
13092 rtx nop0 = force_reg (vmode, op0);
13093 if (op0 == op1)
13094 op1 = nop0;
13095 op0 = nop0;
13096 op1 = force_reg (vmode, op1);
13098 unsigned int i, mask;
13099 for (i = mask = 0; i < 8; ++i)
13100 mask |= (sel[i] & 0xf) << (28 - i*4);
13101 rtx mask_rtx = force_reg (SImode, gen_int_mode (mask, SImode));
13103 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), mask_rtx, const0_rtx));
13104 emit_insn (gen_bshufflev8qi_vis (target, op0, op1));
13105 return true;
13108 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
13110 static bool
13111 sparc_frame_pointer_required (void)
13113 /* If the stack pointer is dynamically modified in the function, it cannot
13114 serve as the frame pointer. */
13115 if (cfun->calls_alloca)
13116 return true;
13118 /* If the function receives nonlocal gotos, it needs to save the frame
13119 pointer in the nonlocal_goto_save_area object. */
13120 if (cfun->has_nonlocal_label)
13121 return true;
13123 /* In flat mode, that's it. */
13124 if (TARGET_FLAT)
13125 return false;
13127 /* Otherwise, the frame pointer is required if the function isn't leaf, but
13128 we cannot use sparc_leaf_function_p since it hasn't been computed yet. */
13129 return !(optimize > 0 && crtl->is_leaf && only_leaf_regs_used ());
13132 /* The way this is structured, we can't eliminate SFP in favor of SP
13133 if the frame pointer is required: we want to use the SFP->HFP elimination
13134 in that case. But the test in update_eliminables doesn't know we are
13135 assuming below that we only do the former elimination. */
13137 static bool
13138 sparc_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
13140 return to == HARD_FRAME_POINTER_REGNUM || !sparc_frame_pointer_required ();
13143 /* If !TARGET_FPU, then make the fp registers and fp cc regs fixed so that
13144 they won't be allocated. */
13146 static void
13147 sparc_conditional_register_usage (void)
13149 if (PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
13150 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13151 /* If the user has passed -f{fixed,call-{used,saved}}-g5 */
13152 /* then honor it. */
13153 if (TARGET_ARCH32 && fixed_regs[5])
13154 fixed_regs[5] = 1;
13155 else if (TARGET_ARCH64 && fixed_regs[5] == 2)
13156 fixed_regs[5] = 0;
13157 if (! TARGET_V9)
13159 int regno;
13160 for (regno = SPARC_FIRST_V9_FP_REG;
13161 regno <= SPARC_LAST_V9_FP_REG;
13162 regno++)
13163 fixed_regs[regno] = 1;
13164 /* %fcc0 is used by v8 and v9. */
13165 for (regno = SPARC_FIRST_V9_FCC_REG + 1;
13166 regno <= SPARC_LAST_V9_FCC_REG;
13167 regno++)
13168 fixed_regs[regno] = 1;
13170 if (! TARGET_FPU)
13172 int regno;
13173 for (regno = 32; regno < SPARC_LAST_V9_FCC_REG; regno++)
13174 fixed_regs[regno] = 1;
13176 /* If the user has passed -f{fixed,call-{used,saved}}-g2 */
13177 /* then honor it. Likewise with g3 and g4. */
13178 if (fixed_regs[2] == 2)
13179 fixed_regs[2] = ! TARGET_APP_REGS;
13180 if (fixed_regs[3] == 2)
13181 fixed_regs[3] = ! TARGET_APP_REGS;
13182 if (TARGET_ARCH32 && fixed_regs[4] == 2)
13183 fixed_regs[4] = ! TARGET_APP_REGS;
13184 else if (TARGET_CM_EMBMEDANY)
13185 fixed_regs[4] = 1;
13186 else if (fixed_regs[4] == 2)
13187 fixed_regs[4] = 0;
13189 /* Disable leaf function optimization in flat mode. */
13190 if (TARGET_FLAT)
13191 memset (sparc_leaf_regs, 0, FIRST_PSEUDO_REGISTER);
13193 if (TARGET_VIS)
13194 global_regs[SPARC_GSR_REG] = 1;
13197 /* Implement TARGET_USE_PSEUDO_PIC_REG. */
13199 static bool
13200 sparc_use_pseudo_pic_reg (void)
13202 return !TARGET_VXWORKS_RTP && flag_pic;
13205 /* Implement TARGET_INIT_PIC_REG. */
13207 static void
13208 sparc_init_pic_reg (void)
13210 edge entry_edge;
13211 rtx_insn *seq;
13213 /* In PIC mode, we need to always initialize the PIC register if optimization
13214 is enabled, because we are called from IRA and LRA may later force things
13215 to the constant pool for optimization purposes. */
13216 if (!flag_pic || (!crtl->uses_pic_offset_table && !optimize))
13217 return;
13219 start_sequence ();
13220 load_got_register ();
13221 if (!TARGET_VXWORKS_RTP)
13222 emit_move_insn (pic_offset_table_rtx, got_register_rtx);
13223 seq = get_insns ();
13224 end_sequence ();
13226 entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
13227 insert_insn_on_edge (seq, entry_edge);
13228 commit_one_edge_insertion (entry_edge);
13231 /* Implement TARGET_PREFERRED_RELOAD_CLASS:
13233 - We can't load constants into FP registers.
13234 - We can't load FP constants into integer registers when soft-float,
13235 because there is no soft-float pattern with a r/F constraint.
13236 - We can't load FP constants into integer registers for TFmode unless
13237 it is 0.0L, because there is no movtf pattern with a r/F constraint.
13238 - Try and reload integer constants (symbolic or otherwise) back into
13239 registers directly, rather than having them dumped to memory. */
13241 static reg_class_t
13242 sparc_preferred_reload_class (rtx x, reg_class_t rclass)
13244 machine_mode mode = GET_MODE (x);
13245 if (CONSTANT_P (x))
13247 if (FP_REG_CLASS_P (rclass)
13248 || rclass == GENERAL_OR_FP_REGS
13249 || rclass == GENERAL_OR_EXTRA_FP_REGS
13250 || (GET_MODE_CLASS (mode) == MODE_FLOAT && ! TARGET_FPU)
13251 || (mode == TFmode && ! const_zero_operand (x, mode)))
13252 return NO_REGS;
13254 if (GET_MODE_CLASS (mode) == MODE_INT)
13255 return GENERAL_REGS;
13257 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
13259 if (! FP_REG_CLASS_P (rclass)
13260 || !(const_zero_operand (x, mode)
13261 || const_all_ones_operand (x, mode)))
13262 return NO_REGS;
13266 if (TARGET_VIS3
13267 && ! TARGET_ARCH64
13268 && (rclass == EXTRA_FP_REGS
13269 || rclass == GENERAL_OR_EXTRA_FP_REGS))
13271 int regno = true_regnum (x);
13273 if (SPARC_INT_REG_P (regno))
13274 return (rclass == EXTRA_FP_REGS
13275 ? FP_REGS : GENERAL_OR_FP_REGS);
13278 return rclass;
13281 /* Output a wide multiply instruction in V8+ mode. INSN is the instruction,
13282 OPERANDS are its operands and OPCODE is the mnemonic to be used. */
13284 const char *
13285 output_v8plus_mult (rtx_insn *insn, rtx *operands, const char *opcode)
13287 char mulstr[32];
13289 gcc_assert (! TARGET_ARCH64);
13291 if (sparc_check_64 (operands[1], insn) <= 0)
13292 output_asm_insn ("srl\t%L1, 0, %L1", operands);
13293 if (which_alternative == 1)
13294 output_asm_insn ("sllx\t%H1, 32, %H1", operands);
13295 if (GET_CODE (operands[2]) == CONST_INT)
13297 if (which_alternative == 1)
13299 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13300 sprintf (mulstr, "%s\t%%H1, %%2, %%L0", opcode);
13301 output_asm_insn (mulstr, operands);
13302 return "srlx\t%L0, 32, %H0";
13304 else
13306 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13307 output_asm_insn ("or\t%L1, %3, %3", operands);
13308 sprintf (mulstr, "%s\t%%3, %%2, %%3", opcode);
13309 output_asm_insn (mulstr, operands);
13310 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13311 return "mov\t%3, %L0";
13314 else if (rtx_equal_p (operands[1], operands[2]))
13316 if (which_alternative == 1)
13318 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13319 sprintf (mulstr, "%s\t%%H1, %%H1, %%L0", opcode);
13320 output_asm_insn (mulstr, operands);
13321 return "srlx\t%L0, 32, %H0";
13323 else
13325 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13326 output_asm_insn ("or\t%L1, %3, %3", operands);
13327 sprintf (mulstr, "%s\t%%3, %%3, %%3", opcode);
13328 output_asm_insn (mulstr, operands);
13329 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13330 return "mov\t%3, %L0";
13333 if (sparc_check_64 (operands[2], insn) <= 0)
13334 output_asm_insn ("srl\t%L2, 0, %L2", operands);
13335 if (which_alternative == 1)
13337 output_asm_insn ("or\t%L1, %H1, %H1", operands);
13338 output_asm_insn ("sllx\t%H2, 32, %L1", operands);
13339 output_asm_insn ("or\t%L2, %L1, %L1", operands);
13340 sprintf (mulstr, "%s\t%%H1, %%L1, %%L0", opcode);
13341 output_asm_insn (mulstr, operands);
13342 return "srlx\t%L0, 32, %H0";
13344 else
13346 output_asm_insn ("sllx\t%H1, 32, %3", operands);
13347 output_asm_insn ("sllx\t%H2, 32, %4", operands);
13348 output_asm_insn ("or\t%L1, %3, %3", operands);
13349 output_asm_insn ("or\t%L2, %4, %4", operands);
13350 sprintf (mulstr, "%s\t%%3, %%4, %%3", opcode);
13351 output_asm_insn (mulstr, operands);
13352 output_asm_insn ("srlx\t%3, 32, %H0", operands);
13353 return "mov\t%3, %L0";
13357 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13358 all fields of TARGET to ELT by means of VIS2 BSHUFFLE insn. MODE
13359 and INNER_MODE are the modes describing TARGET. */
13361 static void
13362 vector_init_bshuffle (rtx target, rtx elt, machine_mode mode,
13363 machine_mode inner_mode)
13365 rtx t1, final_insn, sel;
13366 int bmask;
13368 t1 = gen_reg_rtx (mode);
13370 elt = convert_modes (SImode, inner_mode, elt, true);
13371 emit_move_insn (gen_lowpart(SImode, t1), elt);
13373 switch (mode)
13375 case E_V2SImode:
13376 final_insn = gen_bshufflev2si_vis (target, t1, t1);
13377 bmask = 0x45674567;
13378 break;
13379 case E_V4HImode:
13380 final_insn = gen_bshufflev4hi_vis (target, t1, t1);
13381 bmask = 0x67676767;
13382 break;
13383 case E_V8QImode:
13384 final_insn = gen_bshufflev8qi_vis (target, t1, t1);
13385 bmask = 0x77777777;
13386 break;
13387 default:
13388 gcc_unreachable ();
13391 sel = force_reg (SImode, GEN_INT (bmask));
13392 emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
13393 emit_insn (final_insn);
13396 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13397 all fields of TARGET to ELT in V8QI by means of VIS FPMERGE insn. */
13399 static void
13400 vector_init_fpmerge (rtx target, rtx elt)
13402 rtx t1, t2, t2_low, t3, t3_low;
13404 t1 = gen_reg_rtx (V4QImode);
13405 elt = convert_modes (SImode, QImode, elt, true);
13406 emit_move_insn (gen_lowpart (SImode, t1), elt);
13408 t2 = gen_reg_rtx (V8QImode);
13409 t2_low = gen_lowpart (V4QImode, t2);
13410 emit_insn (gen_fpmerge_vis (t2, t1, t1));
13412 t3 = gen_reg_rtx (V8QImode);
13413 t3_low = gen_lowpart (V4QImode, t3);
13414 emit_insn (gen_fpmerge_vis (t3, t2_low, t2_low));
13416 emit_insn (gen_fpmerge_vis (target, t3_low, t3_low));
13419 /* Subroutine of sparc_expand_vector_init. Emit code to initialize
13420 all fields of TARGET to ELT in V4HI by means of VIS FALIGNDATA insn. */
13422 static void
13423 vector_init_faligndata (rtx target, rtx elt)
13425 rtx t1 = gen_reg_rtx (V4HImode);
13426 int i;
13428 elt = convert_modes (SImode, HImode, elt, true);
13429 emit_move_insn (gen_lowpart (SImode, t1), elt);
13431 emit_insn (gen_alignaddrsi_vis (gen_reg_rtx (SImode),
13432 force_reg (SImode, GEN_INT (6)),
13433 const0_rtx));
13435 for (i = 0; i < 4; i++)
13436 emit_insn (gen_faligndatav4hi_vis (target, t1, target));
13439 /* Emit code to initialize TARGET to values for individual fields VALS. */
13441 void
13442 sparc_expand_vector_init (rtx target, rtx vals)
13444 const machine_mode mode = GET_MODE (target);
13445 const machine_mode inner_mode = GET_MODE_INNER (mode);
13446 const int n_elts = GET_MODE_NUNITS (mode);
13447 int i, n_var = 0;
13448 bool all_same = true;
13449 rtx mem;
13451 for (i = 0; i < n_elts; i++)
13453 rtx x = XVECEXP (vals, 0, i);
13454 if (!(CONST_SCALAR_INT_P (x) || CONST_DOUBLE_P (x) || CONST_FIXED_P (x)))
13455 n_var++;
13457 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
13458 all_same = false;
13461 if (n_var == 0)
13463 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
13464 return;
13467 if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (mode))
13469 if (GET_MODE_SIZE (inner_mode) == 4)
13471 emit_move_insn (gen_lowpart (SImode, target),
13472 gen_lowpart (SImode, XVECEXP (vals, 0, 0)));
13473 return;
13475 else if (GET_MODE_SIZE (inner_mode) == 8)
13477 emit_move_insn (gen_lowpart (DImode, target),
13478 gen_lowpart (DImode, XVECEXP (vals, 0, 0)));
13479 return;
13482 else if (GET_MODE_SIZE (inner_mode) == GET_MODE_SIZE (word_mode)
13483 && GET_MODE_SIZE (mode) == 2 * GET_MODE_SIZE (word_mode))
13485 emit_move_insn (gen_highpart (word_mode, target),
13486 gen_lowpart (word_mode, XVECEXP (vals, 0, 0)));
13487 emit_move_insn (gen_lowpart (word_mode, target),
13488 gen_lowpart (word_mode, XVECEXP (vals, 0, 1)));
13489 return;
13492 if (all_same && GET_MODE_SIZE (mode) == 8)
13494 if (TARGET_VIS2)
13496 vector_init_bshuffle (target, XVECEXP (vals, 0, 0), mode, inner_mode);
13497 return;
13499 if (mode == V8QImode)
13501 vector_init_fpmerge (target, XVECEXP (vals, 0, 0));
13502 return;
13504 if (mode == V4HImode)
13506 vector_init_faligndata (target, XVECEXP (vals, 0, 0));
13507 return;
13511 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
13512 for (i = 0; i < n_elts; i++)
13513 emit_move_insn (adjust_address_nv (mem, inner_mode,
13514 i * GET_MODE_SIZE (inner_mode)),
13515 XVECEXP (vals, 0, i));
13516 emit_move_insn (target, mem);
13519 /* Implement TARGET_SECONDARY_RELOAD. */
13521 static reg_class_t
13522 sparc_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
13523 machine_mode mode, secondary_reload_info *sri)
13525 enum reg_class rclass = (enum reg_class) rclass_i;
13527 sri->icode = CODE_FOR_nothing;
13528 sri->extra_cost = 0;
13530 /* We need a temporary when loading/storing a HImode/QImode value
13531 between memory and the FPU registers. This can happen when combine puts
13532 a paradoxical subreg in a float/fix conversion insn. */
13533 if (FP_REG_CLASS_P (rclass)
13534 && (mode == HImode || mode == QImode)
13535 && (GET_CODE (x) == MEM
13536 || ((GET_CODE (x) == REG || GET_CODE (x) == SUBREG)
13537 && true_regnum (x) == -1)))
13538 return GENERAL_REGS;
13540 /* On 32-bit we need a temporary when loading/storing a DFmode value
13541 between unaligned memory and the upper FPU registers. */
13542 if (TARGET_ARCH32
13543 && rclass == EXTRA_FP_REGS
13544 && mode == DFmode
13545 && GET_CODE (x) == MEM
13546 && ! mem_min_alignment (x, 8))
13547 return FP_REGS;
13549 if (((TARGET_CM_MEDANY
13550 && symbolic_operand (x, mode))
13551 || (TARGET_CM_EMBMEDANY
13552 && text_segment_operand (x, mode)))
13553 && ! flag_pic)
13555 if (in_p)
13556 sri->icode = direct_optab_handler (reload_in_optab, mode);
13557 else
13558 sri->icode = direct_optab_handler (reload_out_optab, mode);
13559 return NO_REGS;
13562 if (TARGET_VIS3 && TARGET_ARCH32)
13564 int regno = true_regnum (x);
13566 /* When using VIS3 fp<-->int register moves, on 32-bit we have
13567 to move 8-byte values in 4-byte pieces. This only works via
13568 FP_REGS, and not via EXTRA_FP_REGS. Therefore if we try to
13569 move between EXTRA_FP_REGS and GENERAL_REGS, we will need
13570 an FP_REGS intermediate move. */
13571 if ((rclass == EXTRA_FP_REGS && SPARC_INT_REG_P (regno))
13572 || ((general_or_i64_p (rclass)
13573 || rclass == GENERAL_OR_FP_REGS)
13574 && SPARC_FP_REG_P (regno)))
13576 sri->extra_cost = 2;
13577 return FP_REGS;
13581 return NO_REGS;
13584 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
13586 On SPARC when not VIS3 it is not possible to directly move data
13587 between GENERAL_REGS and FP_REGS. */
13589 static bool
13590 sparc_secondary_memory_needed (machine_mode mode, reg_class_t class1,
13591 reg_class_t class2)
13593 return ((FP_REG_CLASS_P (class1) != FP_REG_CLASS_P (class2))
13594 && (! TARGET_VIS3
13595 || GET_MODE_SIZE (mode) > 8
13596 || GET_MODE_SIZE (mode) < 4));
13599 /* Implement TARGET_SECONDARY_MEMORY_NEEDED_MODE.
13601 get_secondary_mem widens its argument to BITS_PER_WORD which loses on v9
13602 because the movsi and movsf patterns don't handle r/f moves.
13603 For v8 we copy the default definition. */
13605 static machine_mode
13606 sparc_secondary_memory_needed_mode (machine_mode mode)
13608 if (TARGET_ARCH64)
13610 if (GET_MODE_BITSIZE (mode) < 32)
13611 return mode_for_size (32, GET_MODE_CLASS (mode), 0).require ();
13612 return mode;
13614 else
13616 if (GET_MODE_BITSIZE (mode) < BITS_PER_WORD)
13617 return mode_for_size (BITS_PER_WORD,
13618 GET_MODE_CLASS (mode), 0).require ();
13619 return mode;
13623 /* Emit code to conditionally move either OPERANDS[2] or OPERANDS[3] into
13624 OPERANDS[0] in MODE. OPERANDS[1] is the operator of the condition. */
13626 bool
13627 sparc_expand_conditional_move (machine_mode mode, rtx *operands)
13629 enum rtx_code rc = GET_CODE (operands[1]);
13630 machine_mode cmp_mode;
13631 rtx cc_reg, dst, cmp;
13633 cmp = operands[1];
13634 if (GET_MODE (XEXP (cmp, 0)) == DImode && !TARGET_ARCH64)
13635 return false;
13637 if (GET_MODE (XEXP (cmp, 0)) == TFmode && !TARGET_HARD_QUAD)
13638 cmp = sparc_emit_float_lib_cmp (XEXP (cmp, 0), XEXP (cmp, 1), rc);
13640 cmp_mode = GET_MODE (XEXP (cmp, 0));
13641 rc = GET_CODE (cmp);
13643 dst = operands[0];
13644 if (! rtx_equal_p (operands[2], dst)
13645 && ! rtx_equal_p (operands[3], dst))
13647 if (reg_overlap_mentioned_p (dst, cmp))
13648 dst = gen_reg_rtx (mode);
13650 emit_move_insn (dst, operands[3]);
13652 else if (operands[2] == dst)
13654 operands[2] = operands[3];
13656 if (GET_MODE_CLASS (cmp_mode) == MODE_FLOAT)
13657 rc = reverse_condition_maybe_unordered (rc);
13658 else
13659 rc = reverse_condition (rc);
13662 if (XEXP (cmp, 1) == const0_rtx
13663 && GET_CODE (XEXP (cmp, 0)) == REG
13664 && cmp_mode == DImode
13665 && v9_regcmp_p (rc))
13666 cc_reg = XEXP (cmp, 0);
13667 else
13668 cc_reg = gen_compare_reg_1 (rc, XEXP (cmp, 0), XEXP (cmp, 1));
13670 cmp = gen_rtx_fmt_ee (rc, GET_MODE (cc_reg), cc_reg, const0_rtx);
13672 emit_insn (gen_rtx_SET (dst,
13673 gen_rtx_IF_THEN_ELSE (mode, cmp, operands[2], dst)));
13675 if (dst != operands[0])
13676 emit_move_insn (operands[0], dst);
13678 return true;
13681 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
13682 into OPERANDS[0] in MODE, depending on the outcome of the comparison of
13683 OPERANDS[4] and OPERANDS[5]. OPERANDS[3] is the operator of the condition.
13684 FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
13685 code to be used for the condition mask. */
13687 void
13688 sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
13690 enum rtx_code code = signed_condition (GET_CODE (operands[3]));
13691 rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
13693 mask = gen_reg_rtx (Pmode);
13694 cop0 = operands[4];
13695 cop1 = operands[5];
13696 if (code == LT || code == GE)
13698 code = swap_condition (code);
13699 std::swap (cop0, cop1);
13702 gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
13704 fcmp = gen_rtx_UNSPEC (Pmode,
13705 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
13706 fcode);
13708 cmask = gen_rtx_UNSPEC (DImode,
13709 gen_rtvec (2, mask, gsr),
13710 ccode);
13712 bshuf = gen_rtx_UNSPEC (mode,
13713 gen_rtvec (3, operands[1], operands[2], gsr),
13714 UNSPEC_BSHUFFLE);
13716 emit_insn (gen_rtx_SET (mask, fcmp));
13717 emit_insn (gen_rtx_SET (gsr, cmask));
13719 emit_insn (gen_rtx_SET (operands[0], bshuf));
13722 /* On the SPARC, any mode which naturally allocates into the single float
13723 registers should return 4 here. */
13725 unsigned int
13726 sparc_regmode_natural_size (machine_mode mode)
13728 const enum mode_class cl = GET_MODE_CLASS (mode);
13730 if ((cl == MODE_FLOAT || cl == MODE_VECTOR_INT) && GET_MODE_SIZE (mode) <= 4)
13731 return 4;
13733 return UNITS_PER_WORD;
13736 /* Implement TARGET_HARD_REGNO_NREGS.
13738 On SPARC, ordinary registers hold 32 bits worth; this means both
13739 integer and floating point registers. On v9, integer regs hold 64
13740 bits worth; floating point regs hold 32 bits worth (this includes the
13741 new fp regs as even the odd ones are included in the hard register
13742 count). */
13744 static unsigned int
13745 sparc_hard_regno_nregs (unsigned int regno, machine_mode mode)
13747 if (regno == SPARC_GSR_REG)
13748 return 1;
13749 if (TARGET_ARCH64)
13751 if (SPARC_INT_REG_P (regno) || regno == FRAME_POINTER_REGNUM)
13752 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13753 return CEIL (GET_MODE_SIZE (mode), 4);
13755 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
13758 /* Implement TARGET_HARD_REGNO_MODE_OK.
13760 ??? Because of the funny way we pass parameters we should allow certain
13761 ??? types of float/complex values to be in integer registers during
13762 ??? RTL generation. This only matters on arch32. */
13764 static bool
13765 sparc_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
13767 return (hard_regno_mode_classes[regno] & sparc_mode_class[mode]) != 0;
13770 /* Implement TARGET_MODES_TIEABLE_P.
13772 For V9 we have to deal with the fact that only the lower 32 floating
13773 point registers are 32-bit addressable. */
13775 static bool
13776 sparc_modes_tieable_p (machine_mode mode1, machine_mode mode2)
13778 enum mode_class mclass1, mclass2;
13779 unsigned short size1, size2;
13781 if (mode1 == mode2)
13782 return true;
13784 mclass1 = GET_MODE_CLASS (mode1);
13785 mclass2 = GET_MODE_CLASS (mode2);
13786 if (mclass1 != mclass2)
13787 return false;
13789 if (! TARGET_V9)
13790 return true;
13792 /* Classes are the same and we are V9 so we have to deal with upper
13793 vs. lower floating point registers. If one of the modes is a
13794 4-byte mode, and the other is not, we have to mark them as not
13795 tieable because only the lower 32 floating point register are
13796 addressable 32-bits at a time.
13798 We can't just test explicitly for SFmode, otherwise we won't
13799 cover the vector mode cases properly. */
13801 if (mclass1 != MODE_FLOAT && mclass1 != MODE_VECTOR_INT)
13802 return true;
13804 size1 = GET_MODE_SIZE (mode1);
13805 size2 = GET_MODE_SIZE (mode2);
13806 if ((size1 > 4 && size2 == 4)
13807 || (size2 > 4 && size1 == 4))
13808 return false;
13810 return true;
13813 /* Implement TARGET_CSTORE_MODE. */
13815 static scalar_int_mode
13816 sparc_cstore_mode (enum insn_code icode ATTRIBUTE_UNUSED)
13818 return (TARGET_ARCH64 ? DImode : SImode);
13821 /* Return the compound expression made of T1 and T2. */
13823 static inline tree
13824 compound_expr (tree t1, tree t2)
13826 return build2 (COMPOUND_EXPR, void_type_node, t1, t2);
13829 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
13831 static void
13832 sparc_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
13834 if (!TARGET_FPU)
13835 return;
13837 const unsigned HOST_WIDE_INT accrued_exception_mask = 0x1f << 5;
13838 const unsigned HOST_WIDE_INT trap_enable_mask = 0x1f << 23;
13840 /* We generate the equivalent of feholdexcept (&fenv_var):
13842 unsigned int fenv_var;
13843 __builtin_store_fsr (&fenv_var);
13845 unsigned int tmp1_var;
13846 tmp1_var = fenv_var & ~(accrued_exception_mask | trap_enable_mask);
13848 __builtin_load_fsr (&tmp1_var); */
13850 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
13851 TREE_ADDRESSABLE (fenv_var) = 1;
13852 tree fenv_addr = build_fold_addr_expr (fenv_var);
13853 tree stfsr = sparc_builtins[SPARC_BUILTIN_STFSR];
13854 tree hold_stfsr
13855 = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
13856 build_call_expr (stfsr, 1, fenv_addr), NULL_TREE, NULL_TREE);
13858 tree tmp1_var = create_tmp_var_raw (unsigned_type_node);
13859 TREE_ADDRESSABLE (tmp1_var) = 1;
13860 tree masked_fenv_var
13861 = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
13862 build_int_cst (unsigned_type_node,
13863 ~(accrued_exception_mask | trap_enable_mask)));
13864 tree hold_mask
13865 = build4 (TARGET_EXPR, unsigned_type_node, tmp1_var, masked_fenv_var,
13866 NULL_TREE, NULL_TREE);
13868 tree tmp1_addr = build_fold_addr_expr (tmp1_var);
13869 tree ldfsr = sparc_builtins[SPARC_BUILTIN_LDFSR];
13870 tree hold_ldfsr = build_call_expr (ldfsr, 1, tmp1_addr);
13872 *hold = compound_expr (compound_expr (hold_stfsr, hold_mask), hold_ldfsr);
13874 /* We reload the value of tmp1_var to clear the exceptions:
13876 __builtin_load_fsr (&tmp1_var); */
13878 *clear = build_call_expr (ldfsr, 1, tmp1_addr);
13880 /* We generate the equivalent of feupdateenv (&fenv_var):
13882 unsigned int tmp2_var;
13883 __builtin_store_fsr (&tmp2_var);
13885 __builtin_load_fsr (&fenv_var);
13887 if (SPARC_LOW_FE_EXCEPT_VALUES)
13888 tmp2_var >>= 5;
13889 __atomic_feraiseexcept ((int) tmp2_var); */
13891 tree tmp2_var = create_tmp_var_raw (unsigned_type_node);
13892 TREE_ADDRESSABLE (tmp2_var) = 1;
13893 tree tmp2_addr = build_fold_addr_expr (tmp2_var);
13894 tree update_stfsr
13895 = build4 (TARGET_EXPR, unsigned_type_node, tmp2_var,
13896 build_call_expr (stfsr, 1, tmp2_addr), NULL_TREE, NULL_TREE);
13898 tree update_ldfsr = build_call_expr (ldfsr, 1, fenv_addr);
13900 tree atomic_feraiseexcept
13901 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
13902 tree update_call
13903 = build_call_expr (atomic_feraiseexcept, 1,
13904 fold_convert (integer_type_node, tmp2_var));
13906 if (SPARC_LOW_FE_EXCEPT_VALUES)
13908 tree shifted_tmp2_var
13909 = build2 (RSHIFT_EXPR, unsigned_type_node, tmp2_var,
13910 build_int_cst (unsigned_type_node, 5));
13911 tree update_shift
13912 = build2 (MODIFY_EXPR, void_type_node, tmp2_var, shifted_tmp2_var);
13913 update_call = compound_expr (update_shift, update_call);
13916 *update
13917 = compound_expr (compound_expr (update_stfsr, update_ldfsr), update_call);
13920 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. Borrowed from the PA port.
13922 SImode loads to floating-point registers are not zero-extended.
13923 The definition for LOAD_EXTEND_OP specifies that integer loads
13924 narrower than BITS_PER_WORD will be zero-extended. As a result,
13925 we inhibit changes from SImode unless they are to a mode that is
13926 identical in size.
13928 Likewise for SFmode, since word-mode paradoxical subregs are
13929 problematic on big-endian architectures. */
13931 static bool
13932 sparc_can_change_mode_class (machine_mode from, machine_mode to,
13933 reg_class_t rclass)
13935 if (TARGET_ARCH64
13936 && GET_MODE_SIZE (from) == 4
13937 && GET_MODE_SIZE (to) != 4)
13938 return !reg_classes_intersect_p (rclass, FP_REGS);
13939 return true;
13942 /* Implement TARGET_CONSTANT_ALIGNMENT. */
13944 static HOST_WIDE_INT
13945 sparc_constant_alignment (const_tree exp, HOST_WIDE_INT align)
13947 if (TREE_CODE (exp) == STRING_CST)
13948 return MAX (align, FASTEST_ALIGNMENT);
13949 return align;
13952 /* Implement TARGET_ZERO_CALL_USED_REGS.
13954 Generate a sequence of instructions that zero registers specified by
13955 NEED_ZEROED_HARDREGS. Return the ZEROED_HARDREGS that are actually
13956 zeroed. */
13958 static HARD_REG_SET
13959 sparc_zero_call_used_regs (HARD_REG_SET need_zeroed_hardregs)
13961 for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
13962 if (TEST_HARD_REG_BIT (need_zeroed_hardregs, regno))
13964 /* Do not touch the CC registers or the FP registers if no VIS. */
13965 if (regno >= SPARC_FCC_REG
13966 || (regno >= SPARC_FIRST_FP_REG && !TARGET_VIS))
13967 CLEAR_HARD_REG_BIT (need_zeroed_hardregs, regno);
13969 /* Do not access the odd upper FP registers individually. */
13970 else if (regno >= SPARC_FIRST_V9_FP_REG && (regno & 1))
13973 /* Use the most natural mode for the registers, which is not given by
13974 regno_reg_rtx/reg_raw_mode for the FP registers on the SPARC. */
13975 else
13977 machine_mode mode;
13978 rtx reg;
13980 if (regno < SPARC_FIRST_FP_REG)
13982 reg = regno_reg_rtx[regno];
13983 mode = GET_MODE (reg);
13985 else
13987 mode = regno < SPARC_FIRST_V9_FP_REG ? SFmode : DFmode;
13988 reg = gen_raw_REG (mode, regno);
13991 emit_move_insn (reg, CONST0_RTX (mode));
13995 return need_zeroed_hardregs;
13998 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE. Return TFmode or DFmode
13999 for TI_LONG_DOUBLE_TYPE and the default for others. */
14001 static machine_mode
14002 sparc_c_mode_for_floating_type (enum tree_index ti)
14004 if (ti == TI_LONG_DOUBLE_TYPE)
14005 return SPARC_LONG_DOUBLE_TYPE_SIZE == 128 ? TFmode : DFmode;
14006 return default_mode_for_floating_type (ti);
14009 #include "gt-sparc.h"