gcc/
[official-gcc.git] / gcc / config / spu / spu.c
blobd89c6cc1f6e0bdd8b042a17389905b93db637dbe
1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "alias.h"
31 #include "symtab.h"
32 #include "tree.h"
33 #include "fold-const.h"
34 #include "stringpool.h"
35 #include "stor-layout.h"
36 #include "calls.h"
37 #include "varasm.h"
38 #include "function.h"
39 #include "expmed.h"
40 #include "dojump.h"
41 #include "explow.h"
42 #include "emit-rtl.h"
43 #include "stmt.h"
44 #include "expr.h"
45 #include "insn-codes.h"
46 #include "optabs.h"
47 #include "except.h"
48 #include "output.h"
49 #include "predict.h"
50 #include "dominance.h"
51 #include "cfg.h"
52 #include "cfgrtl.h"
53 #include "cfganal.h"
54 #include "lcm.h"
55 #include "cfgbuild.h"
56 #include "cfgcleanup.h"
57 #include "basic-block.h"
58 #include "diagnostic-core.h"
59 #include "tm_p.h"
60 #include "target.h"
61 #include "langhooks.h"
62 #include "reload.h"
63 #include "sched-int.h"
64 #include "params.h"
65 #include "tree-ssa-alias.h"
66 #include "internal-fn.h"
67 #include "gimple-fold.h"
68 #include "tree-eh.h"
69 #include "gimple-expr.h"
70 #include "gimple.h"
71 #include "gimplify.h"
72 #include "tm-constrs.h"
73 #include "sbitmap.h"
74 #include "df.h"
75 #include "ddg.h"
76 #include "timevar.h"
77 #include "dumpfile.h"
78 #include "cfgloop.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
82 #include "target-def.h"
84 /* Builtin types, data and prototypes. */
86 enum spu_builtin_type_index
88 SPU_BTI_END_OF_PARAMS,
90 /* We create new type nodes for these. */
91 SPU_BTI_V16QI,
92 SPU_BTI_V8HI,
93 SPU_BTI_V4SI,
94 SPU_BTI_V2DI,
95 SPU_BTI_V4SF,
96 SPU_BTI_V2DF,
97 SPU_BTI_UV16QI,
98 SPU_BTI_UV8HI,
99 SPU_BTI_UV4SI,
100 SPU_BTI_UV2DI,
102 /* A 16-byte type. (Implemented with V16QI_type_node) */
103 SPU_BTI_QUADWORD,
105 /* These all correspond to intSI_type_node */
106 SPU_BTI_7,
107 SPU_BTI_S7,
108 SPU_BTI_U7,
109 SPU_BTI_S10,
110 SPU_BTI_S10_4,
111 SPU_BTI_U14,
112 SPU_BTI_16,
113 SPU_BTI_S16,
114 SPU_BTI_S16_2,
115 SPU_BTI_U16,
116 SPU_BTI_U16_2,
117 SPU_BTI_U18,
119 /* These correspond to the standard types */
120 SPU_BTI_INTQI,
121 SPU_BTI_INTHI,
122 SPU_BTI_INTSI,
123 SPU_BTI_INTDI,
125 SPU_BTI_UINTQI,
126 SPU_BTI_UINTHI,
127 SPU_BTI_UINTSI,
128 SPU_BTI_UINTDI,
130 SPU_BTI_FLOAT,
131 SPU_BTI_DOUBLE,
133 SPU_BTI_VOID,
134 SPU_BTI_PTR,
136 SPU_BTI_MAX
139 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
140 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
141 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
142 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
143 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
144 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
145 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
146 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
147 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
148 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
150 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
152 struct spu_builtin_range
154 int low, high;
157 static struct spu_builtin_range spu_builtin_range[] = {
158 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
159 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
160 {0ll, 0x7fll}, /* SPU_BTI_U7 */
161 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
162 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
163 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
164 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
165 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
166 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
167 {0ll, 0xffffll}, /* SPU_BTI_U16 */
168 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
169 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
173 /* Target specific attribute specifications. */
174 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
176 /* Prototypes and external defs. */
177 static int get_pipe (rtx_insn *insn);
178 static int spu_naked_function_p (tree func);
179 static int mem_is_padded_component_ref (rtx x);
180 static void fix_range (const char *);
181 static rtx spu_expand_load (rtx, rtx, rtx, int);
183 /* Which instruction set architecture to use. */
184 int spu_arch;
185 /* Which cpu are we tuning for. */
186 int spu_tune;
188 /* The hardware requires 8 insns between a hint and the branch it
189 effects. This variable describes how many rtl instructions the
190 compiler needs to see before inserting a hint, and then the compiler
191 will insert enough nops to make it at least 8 insns. The default is
192 for the compiler to allow up to 2 nops be emitted. The nops are
193 inserted in pairs, so we round down. */
194 int spu_hint_dist = (8*4) - (2*4);
196 enum spu_immediate {
197 SPU_NONE,
198 SPU_IL,
199 SPU_ILA,
200 SPU_ILH,
201 SPU_ILHU,
202 SPU_ORI,
203 SPU_ORHI,
204 SPU_ORBI,
205 SPU_IOHL
207 enum immediate_class
209 IC_POOL, /* constant pool */
210 IC_IL1, /* one il* instruction */
211 IC_IL2, /* both ilhu and iohl instructions */
212 IC_IL1s, /* one il* instruction */
213 IC_IL2s, /* both ilhu and iohl instructions */
214 IC_FSMBI, /* the fsmbi instruction */
215 IC_CPAT, /* one of the c*d instructions */
216 IC_FSMBI2 /* fsmbi plus 1 other instruction */
219 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
220 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
221 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
222 static enum immediate_class classify_immediate (rtx op,
223 machine_mode mode);
225 /* Pointer mode for __ea references. */
226 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
229 /* Define the structure for the machine field in struct function. */
230 struct GTY(()) machine_function
232 /* Register to use for PIC accesses. */
233 rtx pic_reg;
236 /* How to allocate a 'struct machine_function'. */
237 static struct machine_function *
238 spu_init_machine_status (void)
240 return ggc_cleared_alloc<machine_function> ();
243 /* Implement TARGET_OPTION_OVERRIDE. */
244 static void
245 spu_option_override (void)
247 /* Set up function hooks. */
248 init_machine_status = spu_init_machine_status;
250 /* Small loops will be unpeeled at -O3. For SPU it is more important
251 to keep code small by default. */
252 if (!flag_unroll_loops && !flag_peel_loops)
253 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
254 global_options.x_param_values,
255 global_options_set.x_param_values);
257 flag_omit_frame_pointer = 1;
259 /* Functions must be 8 byte aligned so we correctly handle dual issue */
260 if (align_functions < 8)
261 align_functions = 8;
263 spu_hint_dist = 8*4 - spu_max_nops*4;
264 if (spu_hint_dist < 0)
265 spu_hint_dist = 0;
267 if (spu_fixed_range_string)
268 fix_range (spu_fixed_range_string);
270 /* Determine processor architectural level. */
271 if (spu_arch_string)
273 if (strcmp (&spu_arch_string[0], "cell") == 0)
274 spu_arch = PROCESSOR_CELL;
275 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
276 spu_arch = PROCESSOR_CELLEDP;
277 else
278 error ("bad value (%s) for -march= switch", spu_arch_string);
281 /* Determine processor to tune for. */
282 if (spu_tune_string)
284 if (strcmp (&spu_tune_string[0], "cell") == 0)
285 spu_tune = PROCESSOR_CELL;
286 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
287 spu_tune = PROCESSOR_CELLEDP;
288 else
289 error ("bad value (%s) for -mtune= switch", spu_tune_string);
292 /* Change defaults according to the processor architecture. */
293 if (spu_arch == PROCESSOR_CELLEDP)
295 /* If no command line option has been otherwise specified, change
296 the default to -mno-safe-hints on celledp -- only the original
297 Cell/B.E. processors require this workaround. */
298 if (!(target_flags_explicit & MASK_SAFE_HINTS))
299 target_flags &= ~MASK_SAFE_HINTS;
302 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
305 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
306 struct attribute_spec.handler. */
308 /* True if MODE is valid for the target. By "valid", we mean able to
309 be manipulated in non-trivial ways. In particular, this means all
310 the arithmetic is supported. */
311 static bool
312 spu_scalar_mode_supported_p (machine_mode mode)
314 switch (mode)
316 case QImode:
317 case HImode:
318 case SImode:
319 case SFmode:
320 case DImode:
321 case TImode:
322 case DFmode:
323 return true;
325 default:
326 return false;
330 /* Similarly for vector modes. "Supported" here is less strict. At
331 least some operations are supported; need to check optabs or builtins
332 for further details. */
333 static bool
334 spu_vector_mode_supported_p (machine_mode mode)
336 switch (mode)
338 case V16QImode:
339 case V8HImode:
340 case V4SImode:
341 case V2DImode:
342 case V4SFmode:
343 case V2DFmode:
344 return true;
346 default:
347 return false;
351 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
352 least significant bytes of the outer mode. This function returns
353 TRUE for the SUBREG's where this is correct. */
355 valid_subreg (rtx op)
357 machine_mode om = GET_MODE (op);
358 machine_mode im = GET_MODE (SUBREG_REG (op));
359 return om != VOIDmode && im != VOIDmode
360 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
361 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
362 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
365 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
366 and adjust the start offset. */
367 static rtx
368 adjust_operand (rtx op, HOST_WIDE_INT * start)
370 machine_mode mode;
371 int op_size;
372 /* Strip any paradoxical SUBREG. */
373 if (GET_CODE (op) == SUBREG
374 && (GET_MODE_BITSIZE (GET_MODE (op))
375 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
377 if (start)
378 *start -=
379 GET_MODE_BITSIZE (GET_MODE (op)) -
380 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
381 op = SUBREG_REG (op);
383 /* If it is smaller than SI, assure a SUBREG */
384 op_size = GET_MODE_BITSIZE (GET_MODE (op));
385 if (op_size < 32)
387 if (start)
388 *start += 32 - op_size;
389 op_size = 32;
391 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
392 mode = mode_for_size (op_size, MODE_INT, 0);
393 if (mode != GET_MODE (op))
394 op = gen_rtx_SUBREG (mode, op, 0);
395 return op;
398 void
399 spu_expand_extv (rtx ops[], int unsignedp)
401 rtx dst = ops[0], src = ops[1];
402 HOST_WIDE_INT width = INTVAL (ops[2]);
403 HOST_WIDE_INT start = INTVAL (ops[3]);
404 HOST_WIDE_INT align_mask;
405 rtx s0, s1, mask, r0;
407 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
409 if (MEM_P (src))
411 /* First, determine if we need 1 TImode load or 2. We need only 1
412 if the bits being extracted do not cross the alignment boundary
413 as determined by the MEM and its address. */
415 align_mask = -MEM_ALIGN (src);
416 if ((start & align_mask) == ((start + width - 1) & align_mask))
418 /* Alignment is sufficient for 1 load. */
419 s0 = gen_reg_rtx (TImode);
420 r0 = spu_expand_load (s0, 0, src, start / 8);
421 start &= 7;
422 if (r0)
423 emit_insn (gen_rotqby_ti (s0, s0, r0));
425 else
427 /* Need 2 loads. */
428 s0 = gen_reg_rtx (TImode);
429 s1 = gen_reg_rtx (TImode);
430 r0 = spu_expand_load (s0, s1, src, start / 8);
431 start &= 7;
433 gcc_assert (start + width <= 128);
434 if (r0)
436 rtx r1 = gen_reg_rtx (SImode);
437 mask = gen_reg_rtx (TImode);
438 emit_move_insn (mask, GEN_INT (-1));
439 emit_insn (gen_rotqby_ti (s0, s0, r0));
440 emit_insn (gen_rotqby_ti (s1, s1, r0));
441 if (GET_CODE (r0) == CONST_INT)
442 r1 = GEN_INT (INTVAL (r0) & 15);
443 else
444 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
445 emit_insn (gen_shlqby_ti (mask, mask, r1));
446 emit_insn (gen_selb (s0, s1, s0, mask));
451 else if (GET_CODE (src) == SUBREG)
453 rtx r = SUBREG_REG (src);
454 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
455 s0 = gen_reg_rtx (TImode);
456 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
457 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
458 else
459 emit_move_insn (s0, src);
461 else
463 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
464 s0 = gen_reg_rtx (TImode);
465 emit_move_insn (s0, src);
468 /* Now s0 is TImode and contains the bits to extract at start. */
470 if (start)
471 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
473 if (128 - width)
474 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
476 emit_move_insn (dst, s0);
479 void
480 spu_expand_insv (rtx ops[])
482 HOST_WIDE_INT width = INTVAL (ops[1]);
483 HOST_WIDE_INT start = INTVAL (ops[2]);
484 HOST_WIDE_INT maskbits;
485 machine_mode dst_mode;
486 rtx dst = ops[0], src = ops[3];
487 int dst_size;
488 rtx mask;
489 rtx shift_reg;
490 int shift;
493 if (GET_CODE (ops[0]) == MEM)
494 dst = gen_reg_rtx (TImode);
495 else
496 dst = adjust_operand (dst, &start);
497 dst_mode = GET_MODE (dst);
498 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
500 if (CONSTANT_P (src))
502 machine_mode m =
503 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
504 src = force_reg (m, convert_to_mode (m, src, 0));
506 src = adjust_operand (src, 0);
508 mask = gen_reg_rtx (dst_mode);
509 shift_reg = gen_reg_rtx (dst_mode);
510 shift = dst_size - start - width;
512 /* It's not safe to use subreg here because the compiler assumes
513 that the SUBREG_REG is right justified in the SUBREG. */
514 convert_move (shift_reg, src, 1);
516 if (shift > 0)
518 switch (dst_mode)
520 case SImode:
521 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
522 break;
523 case DImode:
524 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
525 break;
526 case TImode:
527 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
528 break;
529 default:
530 abort ();
533 else if (shift < 0)
534 abort ();
536 switch (dst_size)
538 case 32:
539 maskbits = (-1ll << (32 - width - start));
540 if (start)
541 maskbits += (1ll << (32 - start));
542 emit_move_insn (mask, GEN_INT (maskbits));
543 break;
544 case 64:
545 maskbits = (-1ll << (64 - width - start));
546 if (start)
547 maskbits += (1ll << (64 - start));
548 emit_move_insn (mask, GEN_INT (maskbits));
549 break;
550 case 128:
552 unsigned char arr[16];
553 int i = start / 8;
554 memset (arr, 0, sizeof (arr));
555 arr[i] = 0xff >> (start & 7);
556 for (i++; i <= (start + width - 1) / 8; i++)
557 arr[i] = 0xff;
558 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
559 emit_move_insn (mask, array_to_constant (TImode, arr));
561 break;
562 default:
563 abort ();
565 if (GET_CODE (ops[0]) == MEM)
567 rtx low = gen_reg_rtx (SImode);
568 rtx rotl = gen_reg_rtx (SImode);
569 rtx mask0 = gen_reg_rtx (TImode);
570 rtx addr;
571 rtx addr0;
572 rtx addr1;
573 rtx mem;
575 addr = force_reg (Pmode, XEXP (ops[0], 0));
576 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
577 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
578 emit_insn (gen_negsi2 (rotl, low));
579 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
580 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
581 mem = change_address (ops[0], TImode, addr0);
582 set_mem_alias_set (mem, 0);
583 emit_move_insn (dst, mem);
584 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
585 if (start + width > MEM_ALIGN (ops[0]))
587 rtx shl = gen_reg_rtx (SImode);
588 rtx mask1 = gen_reg_rtx (TImode);
589 rtx dst1 = gen_reg_rtx (TImode);
590 rtx mem1;
591 addr1 = plus_constant (Pmode, addr, 16);
592 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
593 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
594 emit_insn (gen_shlqby_ti (mask1, mask, shl));
595 mem1 = change_address (ops[0], TImode, addr1);
596 set_mem_alias_set (mem1, 0);
597 emit_move_insn (dst1, mem1);
598 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
599 emit_move_insn (mem1, dst1);
601 emit_move_insn (mem, dst);
603 else
604 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
609 spu_expand_block_move (rtx ops[])
611 HOST_WIDE_INT bytes, align, offset;
612 rtx src, dst, sreg, dreg, target;
613 int i;
614 if (GET_CODE (ops[2]) != CONST_INT
615 || GET_CODE (ops[3]) != CONST_INT
616 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
617 return 0;
619 bytes = INTVAL (ops[2]);
620 align = INTVAL (ops[3]);
622 if (bytes <= 0)
623 return 1;
625 dst = ops[0];
626 src = ops[1];
628 if (align == 16)
630 for (offset = 0; offset + 16 <= bytes; offset += 16)
632 dst = adjust_address (ops[0], V16QImode, offset);
633 src = adjust_address (ops[1], V16QImode, offset);
634 emit_move_insn (dst, src);
636 if (offset < bytes)
638 rtx mask;
639 unsigned char arr[16] = { 0 };
640 for (i = 0; i < bytes - offset; i++)
641 arr[i] = 0xff;
642 dst = adjust_address (ops[0], V16QImode, offset);
643 src = adjust_address (ops[1], V16QImode, offset);
644 mask = gen_reg_rtx (V16QImode);
645 sreg = gen_reg_rtx (V16QImode);
646 dreg = gen_reg_rtx (V16QImode);
647 target = gen_reg_rtx (V16QImode);
648 emit_move_insn (mask, array_to_constant (V16QImode, arr));
649 emit_move_insn (dreg, dst);
650 emit_move_insn (sreg, src);
651 emit_insn (gen_selb (target, dreg, sreg, mask));
652 emit_move_insn (dst, target);
654 return 1;
656 return 0;
659 enum spu_comp_code
660 { SPU_EQ, SPU_GT, SPU_GTU };
662 int spu_comp_icode[12][3] = {
663 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
664 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
665 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
666 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
667 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
668 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
669 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
670 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
671 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
672 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
673 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
674 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
677 /* Generate a compare for CODE. Return a brand-new rtx that represents
678 the result of the compare. GCC can figure this out too if we don't
679 provide all variations of compares, but GCC always wants to use
680 WORD_MODE, we can generate better code in most cases if we do it
681 ourselves. */
682 void
683 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
685 int reverse_compare = 0;
686 int reverse_test = 0;
687 rtx compare_result, eq_result;
688 rtx comp_rtx, eq_rtx;
689 machine_mode comp_mode;
690 machine_mode op_mode;
691 enum spu_comp_code scode, eq_code;
692 enum insn_code ior_code;
693 enum rtx_code code = GET_CODE (cmp);
694 rtx op0 = XEXP (cmp, 0);
695 rtx op1 = XEXP (cmp, 1);
696 int index;
697 int eq_test = 0;
699 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
700 and so on, to keep the constant in operand 1. */
701 if (GET_CODE (op1) == CONST_INT)
703 HOST_WIDE_INT val = INTVAL (op1) - 1;
704 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
705 switch (code)
707 case GE:
708 op1 = GEN_INT (val);
709 code = GT;
710 break;
711 case LT:
712 op1 = GEN_INT (val);
713 code = LE;
714 break;
715 case GEU:
716 op1 = GEN_INT (val);
717 code = GTU;
718 break;
719 case LTU:
720 op1 = GEN_INT (val);
721 code = LEU;
722 break;
723 default:
724 break;
728 /* However, if we generate an integer result, performing a reverse test
729 would require an extra negation, so avoid that where possible. */
730 if (GET_CODE (op1) == CONST_INT && is_set == 1)
732 HOST_WIDE_INT val = INTVAL (op1) + 1;
733 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
734 switch (code)
736 case LE:
737 op1 = GEN_INT (val);
738 code = LT;
739 break;
740 case LEU:
741 op1 = GEN_INT (val);
742 code = LTU;
743 break;
744 default:
745 break;
749 comp_mode = SImode;
750 op_mode = GET_MODE (op0);
752 switch (code)
754 case GE:
755 scode = SPU_GT;
756 if (HONOR_NANS (op_mode))
758 reverse_compare = 0;
759 reverse_test = 0;
760 eq_test = 1;
761 eq_code = SPU_EQ;
763 else
765 reverse_compare = 1;
766 reverse_test = 1;
768 break;
769 case LE:
770 scode = SPU_GT;
771 if (HONOR_NANS (op_mode))
773 reverse_compare = 1;
774 reverse_test = 0;
775 eq_test = 1;
776 eq_code = SPU_EQ;
778 else
780 reverse_compare = 0;
781 reverse_test = 1;
783 break;
784 case LT:
785 reverse_compare = 1;
786 reverse_test = 0;
787 scode = SPU_GT;
788 break;
789 case GEU:
790 reverse_compare = 1;
791 reverse_test = 1;
792 scode = SPU_GTU;
793 break;
794 case LEU:
795 reverse_compare = 0;
796 reverse_test = 1;
797 scode = SPU_GTU;
798 break;
799 case LTU:
800 reverse_compare = 1;
801 reverse_test = 0;
802 scode = SPU_GTU;
803 break;
804 case NE:
805 reverse_compare = 0;
806 reverse_test = 1;
807 scode = SPU_EQ;
808 break;
810 case EQ:
811 scode = SPU_EQ;
812 break;
813 case GT:
814 scode = SPU_GT;
815 break;
816 case GTU:
817 scode = SPU_GTU;
818 break;
819 default:
820 scode = SPU_EQ;
821 break;
824 switch (op_mode)
826 case QImode:
827 index = 0;
828 comp_mode = QImode;
829 break;
830 case HImode:
831 index = 1;
832 comp_mode = HImode;
833 break;
834 case SImode:
835 index = 2;
836 break;
837 case DImode:
838 index = 3;
839 break;
840 case TImode:
841 index = 4;
842 break;
843 case SFmode:
844 index = 5;
845 break;
846 case DFmode:
847 index = 6;
848 break;
849 case V16QImode:
850 index = 7;
851 comp_mode = op_mode;
852 break;
853 case V8HImode:
854 index = 8;
855 comp_mode = op_mode;
856 break;
857 case V4SImode:
858 index = 9;
859 comp_mode = op_mode;
860 break;
861 case V4SFmode:
862 index = 10;
863 comp_mode = V4SImode;
864 break;
865 case V2DFmode:
866 index = 11;
867 comp_mode = V2DImode;
868 break;
869 case V2DImode:
870 default:
871 abort ();
874 if (GET_MODE (op1) == DFmode
875 && (scode != SPU_GT && scode != SPU_EQ))
876 abort ();
878 if (is_set == 0 && op1 == const0_rtx
879 && (GET_MODE (op0) == SImode
880 || GET_MODE (op0) == HImode
881 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
883 /* Don't need to set a register with the result when we are
884 comparing against zero and branching. */
885 reverse_test = !reverse_test;
886 compare_result = op0;
888 else
890 compare_result = gen_reg_rtx (comp_mode);
892 if (reverse_compare)
894 rtx t = op1;
895 op1 = op0;
896 op0 = t;
899 if (spu_comp_icode[index][scode] == 0)
900 abort ();
902 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
903 (op0, op_mode))
904 op0 = force_reg (op_mode, op0);
905 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
906 (op1, op_mode))
907 op1 = force_reg (op_mode, op1);
908 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
909 op0, op1);
910 if (comp_rtx == 0)
911 abort ();
912 emit_insn (comp_rtx);
914 if (eq_test)
916 eq_result = gen_reg_rtx (comp_mode);
917 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
918 op0, op1);
919 if (eq_rtx == 0)
920 abort ();
921 emit_insn (eq_rtx);
922 ior_code = optab_handler (ior_optab, comp_mode);
923 gcc_assert (ior_code != CODE_FOR_nothing);
924 emit_insn (GEN_FCN (ior_code)
925 (compare_result, compare_result, eq_result));
929 if (is_set == 0)
931 rtx bcomp;
932 rtx loc_ref;
934 /* We don't have branch on QI compare insns, so we convert the
935 QI compare result to a HI result. */
936 if (comp_mode == QImode)
938 rtx old_res = compare_result;
939 compare_result = gen_reg_rtx (HImode);
940 comp_mode = HImode;
941 emit_insn (gen_extendqihi2 (compare_result, old_res));
944 if (reverse_test)
945 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
946 else
947 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
949 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
950 emit_jump_insn (gen_rtx_SET (pc_rtx,
951 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
952 loc_ref, pc_rtx)));
954 else if (is_set == 2)
956 rtx target = operands[0];
957 int compare_size = GET_MODE_BITSIZE (comp_mode);
958 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
959 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
960 rtx select_mask;
961 rtx op_t = operands[2];
962 rtx op_f = operands[3];
964 /* The result of the comparison can be SI, HI or QI mode. Create a
965 mask based on that result. */
966 if (target_size > compare_size)
968 select_mask = gen_reg_rtx (mode);
969 emit_insn (gen_extend_compare (select_mask, compare_result));
971 else if (target_size < compare_size)
972 select_mask =
973 gen_rtx_SUBREG (mode, compare_result,
974 (compare_size - target_size) / BITS_PER_UNIT);
975 else if (comp_mode != mode)
976 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
977 else
978 select_mask = compare_result;
980 if (GET_MODE (target) != GET_MODE (op_t)
981 || GET_MODE (target) != GET_MODE (op_f))
982 abort ();
984 if (reverse_test)
985 emit_insn (gen_selb (target, op_t, op_f, select_mask));
986 else
987 emit_insn (gen_selb (target, op_f, op_t, select_mask));
989 else
991 rtx target = operands[0];
992 if (reverse_test)
993 emit_insn (gen_rtx_SET (compare_result,
994 gen_rtx_NOT (comp_mode, compare_result)));
995 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
996 emit_insn (gen_extendhisi2 (target, compare_result));
997 else if (GET_MODE (target) == SImode
998 && GET_MODE (compare_result) == QImode)
999 emit_insn (gen_extend_compare (target, compare_result));
1000 else
1001 emit_move_insn (target, compare_result);
1005 HOST_WIDE_INT
1006 const_double_to_hwint (rtx x)
1008 HOST_WIDE_INT val;
1009 REAL_VALUE_TYPE rv;
1010 if (GET_MODE (x) == SFmode)
1012 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1013 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1015 else if (GET_MODE (x) == DFmode)
1017 long l[2];
1018 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1019 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1020 val = l[0];
1021 val = (val << 32) | (l[1] & 0xffffffff);
1023 else
1024 abort ();
1025 return val;
1029 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1031 long tv[2];
1032 REAL_VALUE_TYPE rv;
1033 gcc_assert (mode == SFmode || mode == DFmode);
1035 if (mode == SFmode)
1036 tv[0] = (v << 32) >> 32;
1037 else if (mode == DFmode)
1039 tv[1] = (v << 32) >> 32;
1040 tv[0] = v >> 32;
1042 real_from_target (&rv, tv, mode);
1043 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1046 void
1047 print_operand_address (FILE * file, register rtx addr)
1049 rtx reg;
1050 rtx offset;
1052 if (GET_CODE (addr) == AND
1053 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1054 && INTVAL (XEXP (addr, 1)) == -16)
1055 addr = XEXP (addr, 0);
1057 switch (GET_CODE (addr))
1059 case REG:
1060 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1061 break;
1063 case PLUS:
1064 reg = XEXP (addr, 0);
1065 offset = XEXP (addr, 1);
1066 if (GET_CODE (offset) == REG)
1068 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1069 reg_names[REGNO (offset)]);
1071 else if (GET_CODE (offset) == CONST_INT)
1073 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1074 INTVAL (offset), reg_names[REGNO (reg)]);
1076 else
1077 abort ();
1078 break;
1080 case CONST:
1081 case LABEL_REF:
1082 case SYMBOL_REF:
1083 case CONST_INT:
1084 output_addr_const (file, addr);
1085 break;
1087 default:
1088 debug_rtx (addr);
1089 abort ();
1093 void
1094 print_operand (FILE * file, rtx x, int code)
1096 machine_mode mode = GET_MODE (x);
1097 HOST_WIDE_INT val;
1098 unsigned char arr[16];
1099 int xcode = GET_CODE (x);
1100 int i, info;
1101 if (GET_MODE (x) == VOIDmode)
1102 switch (code)
1104 case 'L': /* 128 bits, signed */
1105 case 'm': /* 128 bits, signed */
1106 case 'T': /* 128 bits, signed */
1107 case 't': /* 128 bits, signed */
1108 mode = TImode;
1109 break;
1110 case 'K': /* 64 bits, signed */
1111 case 'k': /* 64 bits, signed */
1112 case 'D': /* 64 bits, signed */
1113 case 'd': /* 64 bits, signed */
1114 mode = DImode;
1115 break;
1116 case 'J': /* 32 bits, signed */
1117 case 'j': /* 32 bits, signed */
1118 case 's': /* 32 bits, signed */
1119 case 'S': /* 32 bits, signed */
1120 mode = SImode;
1121 break;
1123 switch (code)
1126 case 'j': /* 32 bits, signed */
1127 case 'k': /* 64 bits, signed */
1128 case 'm': /* 128 bits, signed */
1129 if (xcode == CONST_INT
1130 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1132 gcc_assert (logical_immediate_p (x, mode));
1133 constant_to_array (mode, x, arr);
1134 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1135 val = trunc_int_for_mode (val, SImode);
1136 switch (which_logical_immediate (val))
1138 case SPU_ORI:
1139 break;
1140 case SPU_ORHI:
1141 fprintf (file, "h");
1142 break;
1143 case SPU_ORBI:
1144 fprintf (file, "b");
1145 break;
1146 default:
1147 gcc_unreachable();
1150 else
1151 gcc_unreachable();
1152 return;
1154 case 'J': /* 32 bits, signed */
1155 case 'K': /* 64 bits, signed */
1156 case 'L': /* 128 bits, signed */
1157 if (xcode == CONST_INT
1158 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1160 gcc_assert (logical_immediate_p (x, mode)
1161 || iohl_immediate_p (x, mode));
1162 constant_to_array (mode, x, arr);
1163 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1164 val = trunc_int_for_mode (val, SImode);
1165 switch (which_logical_immediate (val))
1167 case SPU_ORI:
1168 case SPU_IOHL:
1169 break;
1170 case SPU_ORHI:
1171 val = trunc_int_for_mode (val, HImode);
1172 break;
1173 case SPU_ORBI:
1174 val = trunc_int_for_mode (val, QImode);
1175 break;
1176 default:
1177 gcc_unreachable();
1179 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1181 else
1182 gcc_unreachable();
1183 return;
1185 case 't': /* 128 bits, signed */
1186 case 'd': /* 64 bits, signed */
1187 case 's': /* 32 bits, signed */
1188 if (CONSTANT_P (x))
1190 enum immediate_class c = classify_immediate (x, mode);
1191 switch (c)
1193 case IC_IL1:
1194 constant_to_array (mode, x, arr);
1195 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1196 val = trunc_int_for_mode (val, SImode);
1197 switch (which_immediate_load (val))
1199 case SPU_IL:
1200 break;
1201 case SPU_ILA:
1202 fprintf (file, "a");
1203 break;
1204 case SPU_ILH:
1205 fprintf (file, "h");
1206 break;
1207 case SPU_ILHU:
1208 fprintf (file, "hu");
1209 break;
1210 default:
1211 gcc_unreachable ();
1213 break;
1214 case IC_CPAT:
1215 constant_to_array (mode, x, arr);
1216 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1217 if (info == 1)
1218 fprintf (file, "b");
1219 else if (info == 2)
1220 fprintf (file, "h");
1221 else if (info == 4)
1222 fprintf (file, "w");
1223 else if (info == 8)
1224 fprintf (file, "d");
1225 break;
1226 case IC_IL1s:
1227 if (xcode == CONST_VECTOR)
1229 x = CONST_VECTOR_ELT (x, 0);
1230 xcode = GET_CODE (x);
1232 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1233 fprintf (file, "a");
1234 else if (xcode == HIGH)
1235 fprintf (file, "hu");
1236 break;
1237 case IC_FSMBI:
1238 case IC_FSMBI2:
1239 case IC_IL2:
1240 case IC_IL2s:
1241 case IC_POOL:
1242 abort ();
1245 else
1246 gcc_unreachable ();
1247 return;
1249 case 'T': /* 128 bits, signed */
1250 case 'D': /* 64 bits, signed */
1251 case 'S': /* 32 bits, signed */
1252 if (CONSTANT_P (x))
1254 enum immediate_class c = classify_immediate (x, mode);
1255 switch (c)
1257 case IC_IL1:
1258 constant_to_array (mode, x, arr);
1259 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1260 val = trunc_int_for_mode (val, SImode);
1261 switch (which_immediate_load (val))
1263 case SPU_IL:
1264 case SPU_ILA:
1265 break;
1266 case SPU_ILH:
1267 case SPU_ILHU:
1268 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1269 break;
1270 default:
1271 gcc_unreachable ();
1273 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1274 break;
1275 case IC_FSMBI:
1276 constant_to_array (mode, x, arr);
1277 val = 0;
1278 for (i = 0; i < 16; i++)
1280 val <<= 1;
1281 val |= arr[i] & 1;
1283 print_operand (file, GEN_INT (val), 0);
1284 break;
1285 case IC_CPAT:
1286 constant_to_array (mode, x, arr);
1287 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1289 break;
1290 case IC_IL1s:
1291 if (xcode == HIGH)
1292 x = XEXP (x, 0);
1293 if (GET_CODE (x) == CONST_VECTOR)
1294 x = CONST_VECTOR_ELT (x, 0);
1295 output_addr_const (file, x);
1296 if (xcode == HIGH)
1297 fprintf (file, "@h");
1298 break;
1299 case IC_IL2:
1300 case IC_IL2s:
1301 case IC_FSMBI2:
1302 case IC_POOL:
1303 abort ();
1306 else
1307 gcc_unreachable ();
1308 return;
1310 case 'C':
1311 if (xcode == CONST_INT)
1313 /* Only 4 least significant bits are relevant for generate
1314 control word instructions. */
1315 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1316 return;
1318 break;
1320 case 'M': /* print code for c*d */
1321 if (GET_CODE (x) == CONST_INT)
1322 switch (INTVAL (x))
1324 case 1:
1325 fprintf (file, "b");
1326 break;
1327 case 2:
1328 fprintf (file, "h");
1329 break;
1330 case 4:
1331 fprintf (file, "w");
1332 break;
1333 case 8:
1334 fprintf (file, "d");
1335 break;
1336 default:
1337 gcc_unreachable();
1339 else
1340 gcc_unreachable();
1341 return;
1343 case 'N': /* Negate the operand */
1344 if (xcode == CONST_INT)
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1346 else if (xcode == CONST_VECTOR)
1347 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1348 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1349 return;
1351 case 'I': /* enable/disable interrupts */
1352 if (xcode == CONST_INT)
1353 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1354 return;
1356 case 'b': /* branch modifiers */
1357 if (xcode == REG)
1358 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1359 else if (COMPARISON_P (x))
1360 fprintf (file, "%s", xcode == NE ? "n" : "");
1361 return;
1363 case 'i': /* indirect call */
1364 if (xcode == MEM)
1366 if (GET_CODE (XEXP (x, 0)) == REG)
1367 /* Used in indirect function calls. */
1368 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1369 else
1370 output_address (XEXP (x, 0));
1372 return;
1374 case 'p': /* load/store */
1375 if (xcode == MEM)
1377 x = XEXP (x, 0);
1378 xcode = GET_CODE (x);
1380 if (xcode == AND)
1382 x = XEXP (x, 0);
1383 xcode = GET_CODE (x);
1385 if (xcode == REG)
1386 fprintf (file, "d");
1387 else if (xcode == CONST_INT)
1388 fprintf (file, "a");
1389 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1390 fprintf (file, "r");
1391 else if (xcode == PLUS || xcode == LO_SUM)
1393 if (GET_CODE (XEXP (x, 1)) == REG)
1394 fprintf (file, "x");
1395 else
1396 fprintf (file, "d");
1398 return;
1400 case 'e':
1401 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1402 val &= 0x7;
1403 output_addr_const (file, GEN_INT (val));
1404 return;
1406 case 'f':
1407 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1408 val &= 0x1f;
1409 output_addr_const (file, GEN_INT (val));
1410 return;
1412 case 'g':
1413 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1414 val &= 0x3f;
1415 output_addr_const (file, GEN_INT (val));
1416 return;
1418 case 'h':
1419 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420 val = (val >> 3) & 0x1f;
1421 output_addr_const (file, GEN_INT (val));
1422 return;
1424 case 'E':
1425 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1426 val = -val;
1427 val &= 0x7;
1428 output_addr_const (file, GEN_INT (val));
1429 return;
1431 case 'F':
1432 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1433 val = -val;
1434 val &= 0x1f;
1435 output_addr_const (file, GEN_INT (val));
1436 return;
1438 case 'G':
1439 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1440 val = -val;
1441 val &= 0x3f;
1442 output_addr_const (file, GEN_INT (val));
1443 return;
1445 case 'H':
1446 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1447 val = -(val & -8ll);
1448 val = (val >> 3) & 0x1f;
1449 output_addr_const (file, GEN_INT (val));
1450 return;
1452 case 'v':
1453 case 'w':
1454 constant_to_array (mode, x, arr);
1455 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1456 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1457 return;
1459 case 0:
1460 if (xcode == REG)
1461 fprintf (file, "%s", reg_names[REGNO (x)]);
1462 else if (xcode == MEM)
1463 output_address (XEXP (x, 0));
1464 else if (xcode == CONST_VECTOR)
1465 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1466 else
1467 output_addr_const (file, x);
1468 return;
1470 /* unused letters
1471 o qr u yz
1472 AB OPQR UVWXYZ */
1473 default:
1474 output_operand_lossage ("invalid %%xn code");
1476 gcc_unreachable ();
1479 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1480 caller saved register. For leaf functions it is more efficient to
1481 use a volatile register because we won't need to save and restore the
1482 pic register. This routine is only valid after register allocation
1483 is completed, so we can pick an unused register. */
1484 static rtx
1485 get_pic_reg (void)
1487 if (!reload_completed && !reload_in_progress)
1488 abort ();
1490 /* If we've already made the decision, we need to keep with it. Once we've
1491 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1492 return true since the register is now live; this should not cause us to
1493 "switch back" to using pic_offset_table_rtx. */
1494 if (!cfun->machine->pic_reg)
1496 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1497 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1498 else
1499 cfun->machine->pic_reg = pic_offset_table_rtx;
1502 return cfun->machine->pic_reg;
1505 /* Split constant addresses to handle cases that are too large.
1506 Add in the pic register when in PIC mode.
1507 Split immediates that require more than 1 instruction. */
1509 spu_split_immediate (rtx * ops)
1511 machine_mode mode = GET_MODE (ops[0]);
1512 enum immediate_class c = classify_immediate (ops[1], mode);
1514 switch (c)
1516 case IC_IL2:
1518 unsigned char arrhi[16];
1519 unsigned char arrlo[16];
1520 rtx to, temp, hi, lo;
1521 int i;
1522 machine_mode imode = mode;
1523 /* We need to do reals as ints because the constant used in the
1524 IOR might not be a legitimate real constant. */
1525 imode = int_mode_for_mode (mode);
1526 constant_to_array (mode, ops[1], arrhi);
1527 if (imode != mode)
1528 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1529 else
1530 to = ops[0];
1531 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1532 for (i = 0; i < 16; i += 4)
1534 arrlo[i + 2] = arrhi[i + 2];
1535 arrlo[i + 3] = arrhi[i + 3];
1536 arrlo[i + 0] = arrlo[i + 1] = 0;
1537 arrhi[i + 2] = arrhi[i + 3] = 0;
1539 hi = array_to_constant (imode, arrhi);
1540 lo = array_to_constant (imode, arrlo);
1541 emit_move_insn (temp, hi);
1542 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1543 return 1;
1545 case IC_FSMBI2:
1547 unsigned char arr_fsmbi[16];
1548 unsigned char arr_andbi[16];
1549 rtx to, reg_fsmbi, reg_and;
1550 int i;
1551 machine_mode imode = mode;
1552 /* We need to do reals as ints because the constant used in the
1553 * AND might not be a legitimate real constant. */
1554 imode = int_mode_for_mode (mode);
1555 constant_to_array (mode, ops[1], arr_fsmbi);
1556 if (imode != mode)
1557 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1558 else
1559 to = ops[0];
1560 for (i = 0; i < 16; i++)
1561 if (arr_fsmbi[i] != 0)
1563 arr_andbi[0] = arr_fsmbi[i];
1564 arr_fsmbi[i] = 0xff;
1566 for (i = 1; i < 16; i++)
1567 arr_andbi[i] = arr_andbi[0];
1568 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1569 reg_and = array_to_constant (imode, arr_andbi);
1570 emit_move_insn (to, reg_fsmbi);
1571 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1572 return 1;
1574 case IC_POOL:
1575 if (reload_in_progress || reload_completed)
1577 rtx mem = force_const_mem (mode, ops[1]);
1578 if (TARGET_LARGE_MEM)
1580 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1581 emit_move_insn (addr, XEXP (mem, 0));
1582 mem = replace_equiv_address (mem, addr);
1584 emit_move_insn (ops[0], mem);
1585 return 1;
1587 break;
1588 case IC_IL1s:
1589 case IC_IL2s:
1590 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1592 if (c == IC_IL2s)
1594 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1595 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1597 else if (flag_pic)
1598 emit_insn (gen_pic (ops[0], ops[1]));
1599 if (flag_pic)
1601 rtx pic_reg = get_pic_reg ();
1602 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1604 return flag_pic || c == IC_IL2s;
1606 break;
1607 case IC_IL1:
1608 case IC_FSMBI:
1609 case IC_CPAT:
1610 break;
1612 return 0;
1615 /* SAVING is TRUE when we are generating the actual load and store
1616 instructions for REGNO. When determining the size of the stack
1617 needed for saving register we must allocate enough space for the
1618 worst case, because we don't always have the information early enough
1619 to not allocate it. But we can at least eliminate the actual loads
1620 and stores during the prologue/epilogue. */
1621 static int
1622 need_to_save_reg (int regno, int saving)
1624 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1625 return 1;
1626 if (flag_pic
1627 && regno == PIC_OFFSET_TABLE_REGNUM
1628 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1629 return 1;
1630 return 0;
1633 /* This function is only correct starting with local register
1634 allocation */
1636 spu_saved_regs_size (void)
1638 int reg_save_size = 0;
1639 int regno;
1641 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1642 if (need_to_save_reg (regno, 0))
1643 reg_save_size += 0x10;
1644 return reg_save_size;
1647 static rtx_insn *
1648 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1650 rtx reg = gen_rtx_REG (V4SImode, regno);
1651 rtx mem =
1652 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1653 return emit_insn (gen_movv4si (mem, reg));
1656 static rtx_insn *
1657 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1659 rtx reg = gen_rtx_REG (V4SImode, regno);
1660 rtx mem =
1661 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1662 return emit_insn (gen_movv4si (reg, mem));
1665 /* This happens after reload, so we need to expand it. */
1666 static rtx_insn *
1667 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1669 rtx_insn *insn;
1670 if (satisfies_constraint_K (GEN_INT (imm)))
1672 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1674 else
1676 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1677 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1678 if (REGNO (src) == REGNO (scratch))
1679 abort ();
1681 return insn;
1684 /* Return nonzero if this function is known to have a null epilogue. */
1687 direct_return (void)
1689 if (reload_completed)
1691 if (cfun->static_chain_decl == 0
1692 && (spu_saved_regs_size ()
1693 + get_frame_size ()
1694 + crtl->outgoing_args_size
1695 + crtl->args.pretend_args_size == 0)
1696 && crtl->is_leaf)
1697 return 1;
1699 return 0;
1703 The stack frame looks like this:
1704 +-------------+
1705 | incoming |
1706 | args |
1707 AP -> +-------------+
1708 | $lr save |
1709 +-------------+
1710 prev SP | back chain |
1711 +-------------+
1712 | var args |
1713 | reg save | crtl->args.pretend_args_size bytes
1714 +-------------+
1715 | ... |
1716 | saved regs | spu_saved_regs_size() bytes
1717 FP -> +-------------+
1718 | ... |
1719 | vars | get_frame_size() bytes
1720 HFP -> +-------------+
1721 | ... |
1722 | outgoing |
1723 | args | crtl->outgoing_args_size bytes
1724 +-------------+
1725 | $lr of next |
1726 | frame |
1727 +-------------+
1728 | back chain |
1729 SP -> +-------------+
1732 void
1733 spu_expand_prologue (void)
1735 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1736 HOST_WIDE_INT total_size;
1737 HOST_WIDE_INT saved_regs_size;
1738 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1739 rtx scratch_reg_0, scratch_reg_1;
1740 rtx_insn *insn;
1741 rtx real;
1743 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1744 cfun->machine->pic_reg = pic_offset_table_rtx;
1746 if (spu_naked_function_p (current_function_decl))
1747 return;
1749 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1750 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1752 saved_regs_size = spu_saved_regs_size ();
1753 total_size = size + saved_regs_size
1754 + crtl->outgoing_args_size
1755 + crtl->args.pretend_args_size;
1757 if (!crtl->is_leaf
1758 || cfun->calls_alloca || total_size > 0)
1759 total_size += STACK_POINTER_OFFSET;
1761 /* Save this first because code after this might use the link
1762 register as a scratch register. */
1763 if (!crtl->is_leaf)
1765 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1766 RTX_FRAME_RELATED_P (insn) = 1;
1769 if (total_size > 0)
1771 offset = -crtl->args.pretend_args_size;
1772 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1773 if (need_to_save_reg (regno, 1))
1775 offset -= 16;
1776 insn = frame_emit_store (regno, sp_reg, offset);
1777 RTX_FRAME_RELATED_P (insn) = 1;
1781 if (flag_pic && cfun->machine->pic_reg)
1783 rtx pic_reg = cfun->machine->pic_reg;
1784 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1785 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1788 if (total_size > 0)
1790 if (flag_stack_check)
1792 /* We compare against total_size-1 because
1793 ($sp >= total_size) <=> ($sp > total_size-1) */
1794 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1795 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1796 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1797 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1799 emit_move_insn (scratch_v4si, size_v4si);
1800 size_v4si = scratch_v4si;
1802 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1803 emit_insn (gen_vec_extractv4si
1804 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1805 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1808 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1809 the value of the previous $sp because we save it as the back
1810 chain. */
1811 if (total_size <= 2000)
1813 /* In this case we save the back chain first. */
1814 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1815 insn =
1816 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1818 else
1820 insn = emit_move_insn (scratch_reg_0, sp_reg);
1821 insn =
1822 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1824 RTX_FRAME_RELATED_P (insn) = 1;
1825 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1826 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1828 if (total_size > 2000)
1830 /* Save the back chain ptr */
1831 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1834 if (frame_pointer_needed)
1836 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1837 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1838 + crtl->outgoing_args_size;
1839 /* Set the new frame_pointer */
1840 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1841 RTX_FRAME_RELATED_P (insn) = 1;
1842 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1843 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1844 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1848 if (flag_stack_usage_info)
1849 current_function_static_stack_size = total_size;
1852 void
1853 spu_expand_epilogue (bool sibcall_p)
1855 int size = get_frame_size (), offset, regno;
1856 HOST_WIDE_INT saved_regs_size, total_size;
1857 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1858 rtx scratch_reg_0;
1860 if (spu_naked_function_p (current_function_decl))
1861 return;
1863 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1865 saved_regs_size = spu_saved_regs_size ();
1866 total_size = size + saved_regs_size
1867 + crtl->outgoing_args_size
1868 + crtl->args.pretend_args_size;
1870 if (!crtl->is_leaf
1871 || cfun->calls_alloca || total_size > 0)
1872 total_size += STACK_POINTER_OFFSET;
1874 if (total_size > 0)
1876 if (cfun->calls_alloca)
1877 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1878 else
1879 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1882 if (saved_regs_size > 0)
1884 offset = -crtl->args.pretend_args_size;
1885 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1886 if (need_to_save_reg (regno, 1))
1888 offset -= 0x10;
1889 frame_emit_load (regno, sp_reg, offset);
1894 if (!crtl->is_leaf)
1895 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1897 if (!sibcall_p)
1899 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1900 emit_jump_insn (gen__return ());
1905 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1907 if (count != 0)
1908 return 0;
1909 /* This is inefficient because it ends up copying to a save-register
1910 which then gets saved even though $lr has already been saved. But
1911 it does generate better code for leaf functions and we don't need
1912 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1913 used for __builtin_return_address anyway, so maybe we don't care if
1914 it's inefficient. */
1915 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1919 /* Given VAL, generate a constant appropriate for MODE.
1920 If MODE is a vector mode, every element will be VAL.
1921 For TImode, VAL will be zero extended to 128 bits. */
1923 spu_const (machine_mode mode, HOST_WIDE_INT val)
1925 rtx inner;
1926 rtvec v;
1927 int units, i;
1929 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1930 || GET_MODE_CLASS (mode) == MODE_FLOAT
1931 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1932 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1934 if (GET_MODE_CLASS (mode) == MODE_INT)
1935 return immed_double_const (val, 0, mode);
1937 /* val is the bit representation of the float */
1938 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1939 return hwint_to_const_double (mode, val);
1941 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1942 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1943 else
1944 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1946 units = GET_MODE_NUNITS (mode);
1948 v = rtvec_alloc (units);
1950 for (i = 0; i < units; ++i)
1951 RTVEC_ELT (v, i) = inner;
1953 return gen_rtx_CONST_VECTOR (mode, v);
1956 /* Create a MODE vector constant from 4 ints. */
1958 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1960 unsigned char arr[16];
1961 arr[0] = (a >> 24) & 0xff;
1962 arr[1] = (a >> 16) & 0xff;
1963 arr[2] = (a >> 8) & 0xff;
1964 arr[3] = (a >> 0) & 0xff;
1965 arr[4] = (b >> 24) & 0xff;
1966 arr[5] = (b >> 16) & 0xff;
1967 arr[6] = (b >> 8) & 0xff;
1968 arr[7] = (b >> 0) & 0xff;
1969 arr[8] = (c >> 24) & 0xff;
1970 arr[9] = (c >> 16) & 0xff;
1971 arr[10] = (c >> 8) & 0xff;
1972 arr[11] = (c >> 0) & 0xff;
1973 arr[12] = (d >> 24) & 0xff;
1974 arr[13] = (d >> 16) & 0xff;
1975 arr[14] = (d >> 8) & 0xff;
1976 arr[15] = (d >> 0) & 0xff;
1977 return array_to_constant(mode, arr);
1980 /* branch hint stuff */
1982 /* An array of these is used to propagate hints to predecessor blocks. */
1983 struct spu_bb_info
1985 rtx_insn *prop_jump; /* propagated from another block */
1986 int bb_index; /* the original block. */
1988 static struct spu_bb_info *spu_bb_info;
1990 #define STOP_HINT_P(INSN) \
1991 (CALL_P(INSN) \
1992 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1993 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1995 /* 1 when RTX is a hinted branch or its target. We keep track of
1996 what has been hinted so the safe-hint code can test it easily. */
1997 #define HINTED_P(RTX) \
1998 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2000 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2001 #define SCHED_ON_EVEN_P(RTX) \
2002 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2004 /* Emit a nop for INSN such that the two will dual issue. This assumes
2005 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2006 We check for TImode to handle a MULTI1 insn which has dual issued its
2007 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
2008 static void
2009 emit_nop_for_insn (rtx_insn *insn)
2011 int p;
2012 rtx_insn *new_insn;
2014 /* We need to handle JUMP_TABLE_DATA separately. */
2015 if (JUMP_TABLE_DATA_P (insn))
2017 new_insn = emit_insn_after (gen_lnop(), insn);
2018 recog_memoized (new_insn);
2019 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2020 return;
2023 p = get_pipe (insn);
2024 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2025 new_insn = emit_insn_after (gen_lnop (), insn);
2026 else if (p == 1 && GET_MODE (insn) == TImode)
2028 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2029 PUT_MODE (new_insn, TImode);
2030 PUT_MODE (insn, VOIDmode);
2032 else
2033 new_insn = emit_insn_after (gen_lnop (), insn);
2034 recog_memoized (new_insn);
2035 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2038 /* Insert nops in basic blocks to meet dual issue alignment
2039 requirements. Also make sure hbrp and hint instructions are at least
2040 one cycle apart, possibly inserting a nop. */
2041 static void
2042 pad_bb(void)
2044 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2045 int length;
2046 int addr;
2048 /* This sets up INSN_ADDRESSES. */
2049 shorten_branches (get_insns ());
2051 /* Keep track of length added by nops. */
2052 length = 0;
2054 prev_insn = 0;
2055 insn = get_insns ();
2056 if (!active_insn_p (insn))
2057 insn = next_active_insn (insn);
2058 for (; insn; insn = next_insn)
2060 next_insn = next_active_insn (insn);
2061 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2062 || INSN_CODE (insn) == CODE_FOR_hbr)
2064 if (hbr_insn)
2066 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2067 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2068 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2069 || (a1 - a0 == 4))
2071 prev_insn = emit_insn_before (gen_lnop (), insn);
2072 PUT_MODE (prev_insn, GET_MODE (insn));
2073 PUT_MODE (insn, TImode);
2074 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2075 length += 4;
2078 hbr_insn = insn;
2080 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2082 if (GET_MODE (insn) == TImode)
2083 PUT_MODE (next_insn, TImode);
2084 insn = next_insn;
2085 next_insn = next_active_insn (insn);
2087 addr = INSN_ADDRESSES (INSN_UID (insn));
2088 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2090 if (((addr + length) & 7) != 0)
2092 emit_nop_for_insn (prev_insn);
2093 length += 4;
2096 else if (GET_MODE (insn) == TImode
2097 && ((next_insn && GET_MODE (next_insn) != TImode)
2098 || get_attr_type (insn) == TYPE_MULTI0)
2099 && ((addr + length) & 7) != 0)
2101 /* prev_insn will always be set because the first insn is
2102 always 8-byte aligned. */
2103 emit_nop_for_insn (prev_insn);
2104 length += 4;
2106 prev_insn = insn;
2111 /* Routines for branch hints. */
2113 static void
2114 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2115 int distance, sbitmap blocks)
2117 rtx branch_label = 0;
2118 rtx_insn *hint;
2119 rtx_insn *insn;
2120 rtx_jump_table_data *table;
2122 if (before == 0 || branch == 0 || target == 0)
2123 return;
2125 /* While scheduling we require hints to be no further than 600, so
2126 we need to enforce that here too */
2127 if (distance > 600)
2128 return;
2130 /* If we have a Basic block note, emit it after the basic block note. */
2131 if (NOTE_INSN_BASIC_BLOCK_P (before))
2132 before = NEXT_INSN (before);
2134 branch_label = gen_label_rtx ();
2135 LABEL_NUSES (branch_label)++;
2136 LABEL_PRESERVE_P (branch_label) = 1;
2137 insn = emit_label_before (branch_label, branch);
2138 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2139 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2141 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2142 recog_memoized (hint);
2143 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2144 HINTED_P (branch) = 1;
2146 if (GET_CODE (target) == LABEL_REF)
2147 HINTED_P (XEXP (target, 0)) = 1;
2148 else if (tablejump_p (branch, 0, &table))
2150 rtvec vec;
2151 int j;
2152 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2153 vec = XVEC (PATTERN (table), 0);
2154 else
2155 vec = XVEC (PATTERN (table), 1);
2156 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2157 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2160 if (distance >= 588)
2162 /* Make sure the hint isn't scheduled any earlier than this point,
2163 which could make it too far for the branch offest to fit */
2164 insn = emit_insn_before (gen_blockage (), hint);
2165 recog_memoized (insn);
2166 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2168 else if (distance <= 8 * 4)
2170 /* To guarantee at least 8 insns between the hint and branch we
2171 insert nops. */
2172 int d;
2173 for (d = distance; d < 8 * 4; d += 4)
2175 insn =
2176 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2177 recog_memoized (insn);
2178 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2181 /* Make sure any nops inserted aren't scheduled before the hint. */
2182 insn = emit_insn_after (gen_blockage (), hint);
2183 recog_memoized (insn);
2184 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2186 /* Make sure any nops inserted aren't scheduled after the call. */
2187 if (CALL_P (branch) && distance < 8 * 4)
2189 insn = emit_insn_before (gen_blockage (), branch);
2190 recog_memoized (insn);
2191 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2196 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2197 the rtx for the branch target. */
2198 static rtx
2199 get_branch_target (rtx_insn *branch)
2201 if (JUMP_P (branch))
2203 rtx set, src;
2205 /* Return statements */
2206 if (GET_CODE (PATTERN (branch)) == RETURN)
2207 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2209 /* ASM GOTOs. */
2210 if (extract_asm_operands (PATTERN (branch)) != NULL)
2211 return NULL;
2213 set = single_set (branch);
2214 src = SET_SRC (set);
2215 if (GET_CODE (SET_DEST (set)) != PC)
2216 abort ();
2218 if (GET_CODE (src) == IF_THEN_ELSE)
2220 rtx lab = 0;
2221 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2222 if (note)
2224 /* If the more probable case is not a fall through, then
2225 try a branch hint. */
2226 int prob = XINT (note, 0);
2227 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2228 && GET_CODE (XEXP (src, 1)) != PC)
2229 lab = XEXP (src, 1);
2230 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2231 && GET_CODE (XEXP (src, 2)) != PC)
2232 lab = XEXP (src, 2);
2234 if (lab)
2236 if (GET_CODE (lab) == RETURN)
2237 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2238 return lab;
2240 return 0;
2243 return src;
2245 else if (CALL_P (branch))
2247 rtx call;
2248 /* All of our call patterns are in a PARALLEL and the CALL is
2249 the first pattern in the PARALLEL. */
2250 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2251 abort ();
2252 call = XVECEXP (PATTERN (branch), 0, 0);
2253 if (GET_CODE (call) == SET)
2254 call = SET_SRC (call);
2255 if (GET_CODE (call) != CALL)
2256 abort ();
2257 return XEXP (XEXP (call, 0), 0);
2259 return 0;
2262 /* The special $hbr register is used to prevent the insn scheduler from
2263 moving hbr insns across instructions which invalidate them. It
2264 should only be used in a clobber, and this function searches for
2265 insns which clobber it. */
2266 static bool
2267 insn_clobbers_hbr (rtx_insn *insn)
2269 if (INSN_P (insn)
2270 && GET_CODE (PATTERN (insn)) == PARALLEL)
2272 rtx parallel = PATTERN (insn);
2273 rtx clobber;
2274 int j;
2275 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2277 clobber = XVECEXP (parallel, 0, j);
2278 if (GET_CODE (clobber) == CLOBBER
2279 && GET_CODE (XEXP (clobber, 0)) == REG
2280 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2281 return 1;
2284 return 0;
2287 /* Search up to 32 insns starting at FIRST:
2288 - at any kind of hinted branch, just return
2289 - at any unconditional branch in the first 15 insns, just return
2290 - at a call or indirect branch, after the first 15 insns, force it to
2291 an even address and return
2292 - at any unconditional branch, after the first 15 insns, force it to
2293 an even address.
2294 At then end of the search, insert an hbrp within 4 insns of FIRST,
2295 and an hbrp within 16 instructions of FIRST.
2297 static void
2298 insert_hbrp_for_ilb_runout (rtx_insn *first)
2300 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2301 int addr = 0, length, first_addr = -1;
2302 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2303 int insert_lnop_after = 0;
2304 for (insn = first; insn; insn = NEXT_INSN (insn))
2305 if (INSN_P (insn))
2307 if (first_addr == -1)
2308 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2309 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2310 length = get_attr_length (insn);
2312 if (before_4 == 0 && addr + length >= 4 * 4)
2313 before_4 = insn;
2314 /* We test for 14 instructions because the first hbrp will add
2315 up to 2 instructions. */
2316 if (before_16 == 0 && addr + length >= 14 * 4)
2317 before_16 = insn;
2319 if (INSN_CODE (insn) == CODE_FOR_hbr)
2321 /* Make sure an hbrp is at least 2 cycles away from a hint.
2322 Insert an lnop after the hbrp when necessary. */
2323 if (before_4 == 0 && addr > 0)
2325 before_4 = insn;
2326 insert_lnop_after |= 1;
2328 else if (before_4 && addr <= 4 * 4)
2329 insert_lnop_after |= 1;
2330 if (before_16 == 0 && addr > 10 * 4)
2332 before_16 = insn;
2333 insert_lnop_after |= 2;
2335 else if (before_16 && addr <= 14 * 4)
2336 insert_lnop_after |= 2;
2339 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2341 if (addr < hbrp_addr0)
2342 hbrp_addr0 = addr;
2343 else if (addr < hbrp_addr1)
2344 hbrp_addr1 = addr;
2347 if (CALL_P (insn) || JUMP_P (insn))
2349 if (HINTED_P (insn))
2350 return;
2352 /* Any branch after the first 15 insns should be on an even
2353 address to avoid a special case branch. There might be
2354 some nops and/or hbrps inserted, so we test after 10
2355 insns. */
2356 if (addr > 10 * 4)
2357 SCHED_ON_EVEN_P (insn) = 1;
2360 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2361 return;
2364 if (addr + length >= 32 * 4)
2366 gcc_assert (before_4 && before_16);
2367 if (hbrp_addr0 > 4 * 4)
2369 insn =
2370 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2371 recog_memoized (insn);
2372 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2373 INSN_ADDRESSES_NEW (insn,
2374 INSN_ADDRESSES (INSN_UID (before_4)));
2375 PUT_MODE (insn, GET_MODE (before_4));
2376 PUT_MODE (before_4, TImode);
2377 if (insert_lnop_after & 1)
2379 insn = emit_insn_before (gen_lnop (), before_4);
2380 recog_memoized (insn);
2381 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2382 INSN_ADDRESSES_NEW (insn,
2383 INSN_ADDRESSES (INSN_UID (before_4)));
2384 PUT_MODE (insn, TImode);
2387 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2388 && hbrp_addr1 > 16 * 4)
2390 insn =
2391 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2392 recog_memoized (insn);
2393 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2394 INSN_ADDRESSES_NEW (insn,
2395 INSN_ADDRESSES (INSN_UID (before_16)));
2396 PUT_MODE (insn, GET_MODE (before_16));
2397 PUT_MODE (before_16, TImode);
2398 if (insert_lnop_after & 2)
2400 insn = emit_insn_before (gen_lnop (), before_16);
2401 recog_memoized (insn);
2402 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2403 INSN_ADDRESSES_NEW (insn,
2404 INSN_ADDRESSES (INSN_UID
2405 (before_16)));
2406 PUT_MODE (insn, TImode);
2409 return;
2412 else if (BARRIER_P (insn))
2413 return;
2417 /* The SPU might hang when it executes 48 inline instructions after a
2418 hinted branch jumps to its hinted target. The beginning of a
2419 function and the return from a call might have been hinted, and
2420 must be handled as well. To prevent a hang we insert 2 hbrps. The
2421 first should be within 6 insns of the branch target. The second
2422 should be within 22 insns of the branch target. When determining
2423 if hbrps are necessary, we look for only 32 inline instructions,
2424 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2425 when inserting new hbrps, we insert them within 4 and 16 insns of
2426 the target. */
2427 static void
2428 insert_hbrp (void)
2430 rtx_insn *insn;
2431 if (TARGET_SAFE_HINTS)
2433 shorten_branches (get_insns ());
2434 /* Insert hbrp at beginning of function */
2435 insn = next_active_insn (get_insns ());
2436 if (insn)
2437 insert_hbrp_for_ilb_runout (insn);
2438 /* Insert hbrp after hinted targets. */
2439 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2440 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2441 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2445 static int in_spu_reorg;
2447 static void
2448 spu_var_tracking (void)
2450 if (flag_var_tracking)
2452 df_analyze ();
2453 timevar_push (TV_VAR_TRACKING);
2454 variable_tracking_main ();
2455 timevar_pop (TV_VAR_TRACKING);
2456 df_finish_pass (false);
2460 /* Insert branch hints. There are no branch optimizations after this
2461 pass, so it's safe to set our branch hints now. */
2462 static void
2463 spu_machine_dependent_reorg (void)
2465 sbitmap blocks;
2466 basic_block bb;
2467 rtx_insn *branch, *insn;
2468 rtx branch_target = 0;
2469 int branch_addr = 0, insn_addr, required_dist = 0;
2470 int i;
2471 unsigned int j;
2473 if (!TARGET_BRANCH_HINTS || optimize == 0)
2475 /* We still do it for unoptimized code because an external
2476 function might have hinted a call or return. */
2477 compute_bb_for_insn ();
2478 insert_hbrp ();
2479 pad_bb ();
2480 spu_var_tracking ();
2481 free_bb_for_insn ();
2482 return;
2485 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2486 bitmap_clear (blocks);
2488 in_spu_reorg = 1;
2489 compute_bb_for_insn ();
2491 /* (Re-)discover loops so that bb->loop_father can be used
2492 in the analysis below. */
2493 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2495 compact_blocks ();
2497 spu_bb_info =
2498 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2499 sizeof (struct spu_bb_info));
2501 /* We need exact insn addresses and lengths. */
2502 shorten_branches (get_insns ());
2504 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2506 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2507 branch = 0;
2508 if (spu_bb_info[i].prop_jump)
2510 branch = spu_bb_info[i].prop_jump;
2511 branch_target = get_branch_target (branch);
2512 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2513 required_dist = spu_hint_dist;
2515 /* Search from end of a block to beginning. In this loop, find
2516 jumps which need a branch and emit them only when:
2517 - it's an indirect branch and we're at the insn which sets
2518 the register
2519 - we're at an insn that will invalidate the hint. e.g., a
2520 call, another hint insn, inline asm that clobbers $hbr, and
2521 some inlined operations (divmodsi4). Don't consider jumps
2522 because they are only at the end of a block and are
2523 considered when we are deciding whether to propagate
2524 - we're getting too far away from the branch. The hbr insns
2525 only have a signed 10 bit offset
2526 We go back as far as possible so the branch will be considered
2527 for propagation when we get to the beginning of the block. */
2528 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2530 if (INSN_P (insn))
2532 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2533 if (branch
2534 && ((GET_CODE (branch_target) == REG
2535 && set_of (branch_target, insn) != NULL_RTX)
2536 || insn_clobbers_hbr (insn)
2537 || branch_addr - insn_addr > 600))
2539 rtx_insn *next = NEXT_INSN (insn);
2540 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2541 if (insn != BB_END (bb)
2542 && branch_addr - next_addr >= required_dist)
2544 if (dump_file)
2545 fprintf (dump_file,
2546 "hint for %i in block %i before %i\n",
2547 INSN_UID (branch), bb->index,
2548 INSN_UID (next));
2549 spu_emit_branch_hint (next, branch, branch_target,
2550 branch_addr - next_addr, blocks);
2552 branch = 0;
2555 /* JUMP_P will only be true at the end of a block. When
2556 branch is already set it means we've previously decided
2557 to propagate a hint for that branch into this block. */
2558 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2560 branch = 0;
2561 if ((branch_target = get_branch_target (insn)))
2563 branch = insn;
2564 branch_addr = insn_addr;
2565 required_dist = spu_hint_dist;
2569 if (insn == BB_HEAD (bb))
2570 break;
2573 if (branch)
2575 /* If we haven't emitted a hint for this branch yet, it might
2576 be profitable to emit it in one of the predecessor blocks,
2577 especially for loops. */
2578 rtx_insn *bbend;
2579 basic_block prev = 0, prop = 0, prev2 = 0;
2580 int loop_exit = 0, simple_loop = 0;
2581 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2583 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2584 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2585 prev = EDGE_PRED (bb, j)->src;
2586 else
2587 prev2 = EDGE_PRED (bb, j)->src;
2589 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2590 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2591 loop_exit = 1;
2592 else if (EDGE_SUCC (bb, j)->dest == bb)
2593 simple_loop = 1;
2595 /* If this branch is a loop exit then propagate to previous
2596 fallthru block. This catches the cases when it is a simple
2597 loop or when there is an initial branch into the loop. */
2598 if (prev && (loop_exit || simple_loop)
2599 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2600 prop = prev;
2602 /* If there is only one adjacent predecessor. Don't propagate
2603 outside this loop. */
2604 else if (prev && single_pred_p (bb)
2605 && prev->loop_father == bb->loop_father)
2606 prop = prev;
2608 /* If this is the JOIN block of a simple IF-THEN then
2609 propagate the hint to the HEADER block. */
2610 else if (prev && prev2
2611 && EDGE_COUNT (bb->preds) == 2
2612 && EDGE_COUNT (prev->preds) == 1
2613 && EDGE_PRED (prev, 0)->src == prev2
2614 && prev2->loop_father == bb->loop_father
2615 && GET_CODE (branch_target) != REG)
2616 prop = prev;
2618 /* Don't propagate when:
2619 - this is a simple loop and the hint would be too far
2620 - this is not a simple loop and there are 16 insns in
2621 this block already
2622 - the predecessor block ends in a branch that will be
2623 hinted
2624 - the predecessor block ends in an insn that invalidates
2625 the hint */
2626 if (prop
2627 && prop->index >= 0
2628 && (bbend = BB_END (prop))
2629 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2630 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2631 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2633 if (dump_file)
2634 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2635 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2636 bb->index, prop->index, bb_loop_depth (bb),
2637 INSN_UID (branch), loop_exit, simple_loop,
2638 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2640 spu_bb_info[prop->index].prop_jump = branch;
2641 spu_bb_info[prop->index].bb_index = i;
2643 else if (branch_addr - next_addr >= required_dist)
2645 if (dump_file)
2646 fprintf (dump_file, "hint for %i in block %i before %i\n",
2647 INSN_UID (branch), bb->index,
2648 INSN_UID (NEXT_INSN (insn)));
2649 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2650 branch_addr - next_addr, blocks);
2652 branch = 0;
2655 free (spu_bb_info);
2657 if (!bitmap_empty_p (blocks))
2658 find_many_sub_basic_blocks (blocks);
2660 /* We have to schedule to make sure alignment is ok. */
2661 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2663 /* The hints need to be scheduled, so call it again. */
2664 schedule_insns ();
2665 df_finish_pass (true);
2667 insert_hbrp ();
2669 pad_bb ();
2671 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2672 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2674 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2675 between its branch label and the branch . We don't move the
2676 label because GCC expects it at the beginning of the block. */
2677 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2678 rtx label_ref = XVECEXP (unspec, 0, 0);
2679 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2680 rtx_insn *branch;
2681 int offset = 0;
2682 for (branch = NEXT_INSN (label);
2683 !JUMP_P (branch) && !CALL_P (branch);
2684 branch = NEXT_INSN (branch))
2685 if (NONJUMP_INSN_P (branch))
2686 offset += get_attr_length (branch);
2687 if (offset > 0)
2688 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2691 spu_var_tracking ();
2693 loop_optimizer_finalize ();
2695 free_bb_for_insn ();
2697 in_spu_reorg = 0;
2701 /* Insn scheduling routines, primarily for dual issue. */
2702 static int
2703 spu_sched_issue_rate (void)
2705 return 2;
2708 static int
2709 uses_ls_unit(rtx_insn *insn)
2711 rtx set = single_set (insn);
2712 if (set != 0
2713 && (GET_CODE (SET_DEST (set)) == MEM
2714 || GET_CODE (SET_SRC (set)) == MEM))
2715 return 1;
2716 return 0;
2719 static int
2720 get_pipe (rtx_insn *insn)
2722 enum attr_type t;
2723 /* Handle inline asm */
2724 if (INSN_CODE (insn) == -1)
2725 return -1;
2726 t = get_attr_type (insn);
2727 switch (t)
2729 case TYPE_CONVERT:
2730 return -2;
2731 case TYPE_MULTI0:
2732 return -1;
2734 case TYPE_FX2:
2735 case TYPE_FX3:
2736 case TYPE_SPR:
2737 case TYPE_NOP:
2738 case TYPE_FXB:
2739 case TYPE_FPD:
2740 case TYPE_FP6:
2741 case TYPE_FP7:
2742 return 0;
2744 case TYPE_LNOP:
2745 case TYPE_SHUF:
2746 case TYPE_LOAD:
2747 case TYPE_STORE:
2748 case TYPE_BR:
2749 case TYPE_MULTI1:
2750 case TYPE_HBR:
2751 case TYPE_IPREFETCH:
2752 return 1;
2753 default:
2754 abort ();
2759 /* haifa-sched.c has a static variable that keeps track of the current
2760 cycle. It is passed to spu_sched_reorder, and we record it here for
2761 use by spu_sched_variable_issue. It won't be accurate if the
2762 scheduler updates it's clock_var between the two calls. */
2763 static int clock_var;
2765 /* This is used to keep track of insn alignment. Set to 0 at the
2766 beginning of each block and increased by the "length" attr of each
2767 insn scheduled. */
2768 static int spu_sched_length;
2770 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2771 ready list appropriately in spu_sched_reorder(). */
2772 static int pipe0_clock;
2773 static int pipe1_clock;
2775 static int prev_clock_var;
2777 static int prev_priority;
2779 /* The SPU needs to load the next ilb sometime during the execution of
2780 the previous ilb. There is a potential conflict if every cycle has a
2781 load or store. To avoid the conflict we make sure the load/store
2782 unit is free for at least one cycle during the execution of insns in
2783 the previous ilb. */
2784 static int spu_ls_first;
2785 static int prev_ls_clock;
2787 static void
2788 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2789 int max_ready ATTRIBUTE_UNUSED)
2791 spu_sched_length = 0;
2794 static void
2795 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2796 int max_ready ATTRIBUTE_UNUSED)
2798 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2800 /* When any block might be at least 8-byte aligned, assume they
2801 will all be at least 8-byte aligned to make sure dual issue
2802 works out correctly. */
2803 spu_sched_length = 0;
2805 spu_ls_first = INT_MAX;
2806 clock_var = -1;
2807 prev_ls_clock = -1;
2808 pipe0_clock = -1;
2809 pipe1_clock = -1;
2810 prev_clock_var = -1;
2811 prev_priority = -1;
2814 static int
2815 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2816 int verbose ATTRIBUTE_UNUSED,
2817 rtx_insn *insn, int more)
2819 int len;
2820 int p;
2821 if (GET_CODE (PATTERN (insn)) == USE
2822 || GET_CODE (PATTERN (insn)) == CLOBBER
2823 || (len = get_attr_length (insn)) == 0)
2824 return more;
2826 spu_sched_length += len;
2828 /* Reset on inline asm */
2829 if (INSN_CODE (insn) == -1)
2831 spu_ls_first = INT_MAX;
2832 pipe0_clock = -1;
2833 pipe1_clock = -1;
2834 return 0;
2836 p = get_pipe (insn);
2837 if (p == 0)
2838 pipe0_clock = clock_var;
2839 else
2840 pipe1_clock = clock_var;
2842 if (in_spu_reorg)
2844 if (clock_var - prev_ls_clock > 1
2845 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2846 spu_ls_first = INT_MAX;
2847 if (uses_ls_unit (insn))
2849 if (spu_ls_first == INT_MAX)
2850 spu_ls_first = spu_sched_length;
2851 prev_ls_clock = clock_var;
2854 /* The scheduler hasn't inserted the nop, but we will later on.
2855 Include those nops in spu_sched_length. */
2856 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2857 spu_sched_length += 4;
2858 prev_clock_var = clock_var;
2860 /* more is -1 when called from spu_sched_reorder for new insns
2861 that don't have INSN_PRIORITY */
2862 if (more >= 0)
2863 prev_priority = INSN_PRIORITY (insn);
2866 /* Always try issuing more insns. spu_sched_reorder will decide
2867 when the cycle should be advanced. */
2868 return 1;
2871 /* This function is called for both TARGET_SCHED_REORDER and
2872 TARGET_SCHED_REORDER2. */
2873 static int
2874 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2875 rtx_insn **ready, int *nreadyp, int clock)
2877 int i, nready = *nreadyp;
2878 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2879 rtx_insn *insn;
2881 clock_var = clock;
2883 if (nready <= 0 || pipe1_clock >= clock)
2884 return 0;
2886 /* Find any rtl insns that don't generate assembly insns and schedule
2887 them first. */
2888 for (i = nready - 1; i >= 0; i--)
2890 insn = ready[i];
2891 if (INSN_CODE (insn) == -1
2892 || INSN_CODE (insn) == CODE_FOR_blockage
2893 || (INSN_P (insn) && get_attr_length (insn) == 0))
2895 ready[i] = ready[nready - 1];
2896 ready[nready - 1] = insn;
2897 return 1;
2901 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2902 for (i = 0; i < nready; i++)
2903 if (INSN_CODE (ready[i]) != -1)
2905 insn = ready[i];
2906 switch (get_attr_type (insn))
2908 default:
2909 case TYPE_MULTI0:
2910 case TYPE_CONVERT:
2911 case TYPE_FX2:
2912 case TYPE_FX3:
2913 case TYPE_SPR:
2914 case TYPE_NOP:
2915 case TYPE_FXB:
2916 case TYPE_FPD:
2917 case TYPE_FP6:
2918 case TYPE_FP7:
2919 pipe_0 = i;
2920 break;
2921 case TYPE_LOAD:
2922 case TYPE_STORE:
2923 pipe_ls = i;
2924 case TYPE_LNOP:
2925 case TYPE_SHUF:
2926 case TYPE_BR:
2927 case TYPE_MULTI1:
2928 case TYPE_HBR:
2929 pipe_1 = i;
2930 break;
2931 case TYPE_IPREFETCH:
2932 pipe_hbrp = i;
2933 break;
2937 /* In the first scheduling phase, schedule loads and stores together
2938 to increase the chance they will get merged during postreload CSE. */
2939 if (!reload_completed && pipe_ls >= 0)
2941 insn = ready[pipe_ls];
2942 ready[pipe_ls] = ready[nready - 1];
2943 ready[nready - 1] = insn;
2944 return 1;
2947 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2948 if (pipe_hbrp >= 0)
2949 pipe_1 = pipe_hbrp;
2951 /* When we have loads/stores in every cycle of the last 15 insns and
2952 we are about to schedule another load/store, emit an hbrp insn
2953 instead. */
2954 if (in_spu_reorg
2955 && spu_sched_length - spu_ls_first >= 4 * 15
2956 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2958 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2959 recog_memoized (insn);
2960 if (pipe0_clock < clock)
2961 PUT_MODE (insn, TImode);
2962 spu_sched_variable_issue (file, verbose, insn, -1);
2963 return 0;
2966 /* In general, we want to emit nops to increase dual issue, but dual
2967 issue isn't faster when one of the insns could be scheduled later
2968 without effecting the critical path. We look at INSN_PRIORITY to
2969 make a good guess, but it isn't perfect so -mdual-nops=n can be
2970 used to effect it. */
2971 if (in_spu_reorg && spu_dual_nops < 10)
2973 /* When we are at an even address and we are not issuing nops to
2974 improve scheduling then we need to advance the cycle. */
2975 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2976 && (spu_dual_nops == 0
2977 || (pipe_1 != -1
2978 && prev_priority >
2979 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2980 return 0;
2982 /* When at an odd address, schedule the highest priority insn
2983 without considering pipeline. */
2984 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2985 && (spu_dual_nops == 0
2986 || (prev_priority >
2987 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2988 return 1;
2992 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2993 pipe0 insn in the ready list, schedule it. */
2994 if (pipe0_clock < clock && pipe_0 >= 0)
2995 schedule_i = pipe_0;
2997 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2998 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2999 else
3000 schedule_i = pipe_1;
3002 if (schedule_i > -1)
3004 insn = ready[schedule_i];
3005 ready[schedule_i] = ready[nready - 1];
3006 ready[nready - 1] = insn;
3007 return 1;
3009 return 0;
3012 /* INSN is dependent on DEP_INSN. */
3013 static int
3014 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3016 rtx set;
3018 /* The blockage pattern is used to prevent instructions from being
3019 moved across it and has no cost. */
3020 if (INSN_CODE (insn) == CODE_FOR_blockage
3021 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3022 return 0;
3024 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3025 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3026 return 0;
3028 /* Make sure hbrps are spread out. */
3029 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3030 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3031 return 8;
3033 /* Make sure hints and hbrps are 2 cycles apart. */
3034 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3035 || INSN_CODE (insn) == CODE_FOR_hbr)
3036 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3037 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3038 return 2;
3040 /* An hbrp has no real dependency on other insns. */
3041 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3042 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3043 return 0;
3045 /* Assuming that it is unlikely an argument register will be used in
3046 the first cycle of the called function, we reduce the cost for
3047 slightly better scheduling of dep_insn. When not hinted, the
3048 mispredicted branch would hide the cost as well. */
3049 if (CALL_P (insn))
3051 rtx target = get_branch_target (insn);
3052 if (GET_CODE (target) != REG || !set_of (target, insn))
3053 return cost - 2;
3054 return cost;
3057 /* And when returning from a function, let's assume the return values
3058 are completed sooner too. */
3059 if (CALL_P (dep_insn))
3060 return cost - 2;
3062 /* Make sure an instruction that loads from the back chain is schedule
3063 away from the return instruction so a hint is more likely to get
3064 issued. */
3065 if (INSN_CODE (insn) == CODE_FOR__return
3066 && (set = single_set (dep_insn))
3067 && GET_CODE (SET_DEST (set)) == REG
3068 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3069 return 20;
3071 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3072 scheduler makes every insn in a block anti-dependent on the final
3073 jump_insn. We adjust here so higher cost insns will get scheduled
3074 earlier. */
3075 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3076 return insn_cost (dep_insn) - 3;
3078 return cost;
3081 /* Create a CONST_DOUBLE from a string. */
3083 spu_float_const (const char *string, machine_mode mode)
3085 REAL_VALUE_TYPE value;
3086 value = REAL_VALUE_ATOF (string, mode);
3087 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3091 spu_constant_address_p (rtx x)
3093 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3094 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3095 || GET_CODE (x) == HIGH);
3098 static enum spu_immediate
3099 which_immediate_load (HOST_WIDE_INT val)
3101 gcc_assert (val == trunc_int_for_mode (val, SImode));
3103 if (val >= -0x8000 && val <= 0x7fff)
3104 return SPU_IL;
3105 if (val >= 0 && val <= 0x3ffff)
3106 return SPU_ILA;
3107 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3108 return SPU_ILH;
3109 if ((val & 0xffff) == 0)
3110 return SPU_ILHU;
3112 return SPU_NONE;
3115 /* Return true when OP can be loaded by one of the il instructions, or
3116 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3118 immediate_load_p (rtx op, machine_mode mode)
3120 if (CONSTANT_P (op))
3122 enum immediate_class c = classify_immediate (op, mode);
3123 return c == IC_IL1 || c == IC_IL1s
3124 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3126 return 0;
3129 /* Return true if the first SIZE bytes of arr is a constant that can be
3130 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3131 represent the size and offset of the instruction to use. */
3132 static int
3133 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3135 int cpat, run, i, start;
3136 cpat = 1;
3137 run = 0;
3138 start = -1;
3139 for (i = 0; i < size && cpat; i++)
3140 if (arr[i] != i+16)
3142 if (!run)
3144 start = i;
3145 if (arr[i] == 3)
3146 run = 1;
3147 else if (arr[i] == 2 && arr[i+1] == 3)
3148 run = 2;
3149 else if (arr[i] == 0)
3151 while (arr[i+run] == run && i+run < 16)
3152 run++;
3153 if (run != 4 && run != 8)
3154 cpat = 0;
3156 else
3157 cpat = 0;
3158 if ((i & (run-1)) != 0)
3159 cpat = 0;
3160 i += run;
3162 else
3163 cpat = 0;
3165 if (cpat && (run || size < 16))
3167 if (run == 0)
3168 run = 1;
3169 if (prun)
3170 *prun = run;
3171 if (pstart)
3172 *pstart = start == -1 ? 16-run : start;
3173 return 1;
3175 return 0;
3178 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3179 it into a register. MODE is only valid when OP is a CONST_INT. */
3180 static enum immediate_class
3181 classify_immediate (rtx op, machine_mode mode)
3183 HOST_WIDE_INT val;
3184 unsigned char arr[16];
3185 int i, j, repeated, fsmbi, repeat;
3187 gcc_assert (CONSTANT_P (op));
3189 if (GET_MODE (op) != VOIDmode)
3190 mode = GET_MODE (op);
3192 /* A V4SI const_vector with all identical symbols is ok. */
3193 if (!flag_pic
3194 && mode == V4SImode
3195 && GET_CODE (op) == CONST_VECTOR
3196 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3197 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3198 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3199 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3200 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3201 op = CONST_VECTOR_ELT (op, 0);
3203 switch (GET_CODE (op))
3205 case SYMBOL_REF:
3206 case LABEL_REF:
3207 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3209 case CONST:
3210 /* We can never know if the resulting address fits in 18 bits and can be
3211 loaded with ila. For now, assume the address will not overflow if
3212 the displacement is "small" (fits 'K' constraint). */
3213 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3215 rtx sym = XEXP (XEXP (op, 0), 0);
3216 rtx cst = XEXP (XEXP (op, 0), 1);
3218 if (GET_CODE (sym) == SYMBOL_REF
3219 && GET_CODE (cst) == CONST_INT
3220 && satisfies_constraint_K (cst))
3221 return IC_IL1s;
3223 return IC_IL2s;
3225 case HIGH:
3226 return IC_IL1s;
3228 case CONST_VECTOR:
3229 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3230 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3231 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3232 return IC_POOL;
3233 /* Fall through. */
3235 case CONST_INT:
3236 case CONST_DOUBLE:
3237 constant_to_array (mode, op, arr);
3239 /* Check that each 4-byte slot is identical. */
3240 repeated = 1;
3241 for (i = 4; i < 16; i += 4)
3242 for (j = 0; j < 4; j++)
3243 if (arr[j] != arr[i + j])
3244 repeated = 0;
3246 if (repeated)
3248 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3249 val = trunc_int_for_mode (val, SImode);
3251 if (which_immediate_load (val) != SPU_NONE)
3252 return IC_IL1;
3255 /* Any mode of 2 bytes or smaller can be loaded with an il
3256 instruction. */
3257 gcc_assert (GET_MODE_SIZE (mode) > 2);
3259 fsmbi = 1;
3260 repeat = 0;
3261 for (i = 0; i < 16 && fsmbi; i++)
3262 if (arr[i] != 0 && repeat == 0)
3263 repeat = arr[i];
3264 else if (arr[i] != 0 && arr[i] != repeat)
3265 fsmbi = 0;
3266 if (fsmbi)
3267 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3269 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3270 return IC_CPAT;
3272 if (repeated)
3273 return IC_IL2;
3275 return IC_POOL;
3276 default:
3277 break;
3279 gcc_unreachable ();
3282 static enum spu_immediate
3283 which_logical_immediate (HOST_WIDE_INT val)
3285 gcc_assert (val == trunc_int_for_mode (val, SImode));
3287 if (val >= -0x200 && val <= 0x1ff)
3288 return SPU_ORI;
3289 if (val >= 0 && val <= 0xffff)
3290 return SPU_IOHL;
3291 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3293 val = trunc_int_for_mode (val, HImode);
3294 if (val >= -0x200 && val <= 0x1ff)
3295 return SPU_ORHI;
3296 if ((val & 0xff) == ((val >> 8) & 0xff))
3298 val = trunc_int_for_mode (val, QImode);
3299 if (val >= -0x200 && val <= 0x1ff)
3300 return SPU_ORBI;
3303 return SPU_NONE;
3306 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3307 CONST_DOUBLEs. */
3308 static int
3309 const_vector_immediate_p (rtx x)
3311 int i;
3312 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3313 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3314 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3315 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3316 return 0;
3317 return 1;
3321 logical_immediate_p (rtx op, machine_mode mode)
3323 HOST_WIDE_INT val;
3324 unsigned char arr[16];
3325 int i, j;
3327 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3328 || GET_CODE (op) == CONST_VECTOR);
3330 if (GET_CODE (op) == CONST_VECTOR
3331 && !const_vector_immediate_p (op))
3332 return 0;
3334 if (GET_MODE (op) != VOIDmode)
3335 mode = GET_MODE (op);
3337 constant_to_array (mode, op, arr);
3339 /* Check that bytes are repeated. */
3340 for (i = 4; i < 16; i += 4)
3341 for (j = 0; j < 4; j++)
3342 if (arr[j] != arr[i + j])
3343 return 0;
3345 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3346 val = trunc_int_for_mode (val, SImode);
3348 i = which_logical_immediate (val);
3349 return i != SPU_NONE && i != SPU_IOHL;
3353 iohl_immediate_p (rtx op, machine_mode mode)
3355 HOST_WIDE_INT val;
3356 unsigned char arr[16];
3357 int i, j;
3359 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3360 || GET_CODE (op) == CONST_VECTOR);
3362 if (GET_CODE (op) == CONST_VECTOR
3363 && !const_vector_immediate_p (op))
3364 return 0;
3366 if (GET_MODE (op) != VOIDmode)
3367 mode = GET_MODE (op);
3369 constant_to_array (mode, op, arr);
3371 /* Check that bytes are repeated. */
3372 for (i = 4; i < 16; i += 4)
3373 for (j = 0; j < 4; j++)
3374 if (arr[j] != arr[i + j])
3375 return 0;
3377 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3378 val = trunc_int_for_mode (val, SImode);
3380 return val >= 0 && val <= 0xffff;
3384 arith_immediate_p (rtx op, machine_mode mode,
3385 HOST_WIDE_INT low, HOST_WIDE_INT high)
3387 HOST_WIDE_INT val;
3388 unsigned char arr[16];
3389 int bytes, i, j;
3391 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3392 || GET_CODE (op) == CONST_VECTOR);
3394 if (GET_CODE (op) == CONST_VECTOR
3395 && !const_vector_immediate_p (op))
3396 return 0;
3398 if (GET_MODE (op) != VOIDmode)
3399 mode = GET_MODE (op);
3401 constant_to_array (mode, op, arr);
3403 if (VECTOR_MODE_P (mode))
3404 mode = GET_MODE_INNER (mode);
3406 bytes = GET_MODE_SIZE (mode);
3407 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3409 /* Check that bytes are repeated. */
3410 for (i = bytes; i < 16; i += bytes)
3411 for (j = 0; j < bytes; j++)
3412 if (arr[j] != arr[i + j])
3413 return 0;
3415 val = arr[0];
3416 for (j = 1; j < bytes; j++)
3417 val = (val << 8) | arr[j];
3419 val = trunc_int_for_mode (val, mode);
3421 return val >= low && val <= high;
3424 /* TRUE when op is an immediate and an exact power of 2, and given that
3425 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3426 all entries must be the same. */
3427 bool
3428 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3430 machine_mode int_mode;
3431 HOST_WIDE_INT val;
3432 unsigned char arr[16];
3433 int bytes, i, j;
3435 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3436 || GET_CODE (op) == CONST_VECTOR);
3438 if (GET_CODE (op) == CONST_VECTOR
3439 && !const_vector_immediate_p (op))
3440 return 0;
3442 if (GET_MODE (op) != VOIDmode)
3443 mode = GET_MODE (op);
3445 constant_to_array (mode, op, arr);
3447 if (VECTOR_MODE_P (mode))
3448 mode = GET_MODE_INNER (mode);
3450 bytes = GET_MODE_SIZE (mode);
3451 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3453 /* Check that bytes are repeated. */
3454 for (i = bytes; i < 16; i += bytes)
3455 for (j = 0; j < bytes; j++)
3456 if (arr[j] != arr[i + j])
3457 return 0;
3459 val = arr[0];
3460 for (j = 1; j < bytes; j++)
3461 val = (val << 8) | arr[j];
3463 val = trunc_int_for_mode (val, int_mode);
3465 /* Currently, we only handle SFmode */
3466 gcc_assert (mode == SFmode);
3467 if (mode == SFmode)
3469 int exp = (val >> 23) - 127;
3470 return val > 0 && (val & 0x007fffff) == 0
3471 && exp >= low && exp <= high;
3473 return FALSE;
3476 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3478 static bool
3479 ea_symbol_ref_p (const_rtx x)
3481 tree decl;
3483 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3485 rtx plus = XEXP (x, 0);
3486 rtx op0 = XEXP (plus, 0);
3487 rtx op1 = XEXP (plus, 1);
3488 if (GET_CODE (op1) == CONST_INT)
3489 x = op0;
3492 return (GET_CODE (x) == SYMBOL_REF
3493 && (decl = SYMBOL_REF_DECL (x)) != 0
3494 && TREE_CODE (decl) == VAR_DECL
3495 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3498 /* We accept:
3499 - any 32-bit constant (SImode, SFmode)
3500 - any constant that can be generated with fsmbi (any mode)
3501 - a 64-bit constant where the high and low bits are identical
3502 (DImode, DFmode)
3503 - a 128-bit constant where the four 32-bit words match. */
3504 bool
3505 spu_legitimate_constant_p (machine_mode mode, rtx x)
3507 subrtx_iterator::array_type array;
3508 if (GET_CODE (x) == HIGH)
3509 x = XEXP (x, 0);
3511 /* Reject any __ea qualified reference. These can't appear in
3512 instructions but must be forced to the constant pool. */
3513 FOR_EACH_SUBRTX (iter, array, x, ALL)
3514 if (ea_symbol_ref_p (*iter))
3515 return 0;
3517 /* V4SI with all identical symbols is valid. */
3518 if (!flag_pic
3519 && mode == V4SImode
3520 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3521 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3522 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3523 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3524 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3525 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3527 if (GET_CODE (x) == CONST_VECTOR
3528 && !const_vector_immediate_p (x))
3529 return 0;
3530 return 1;
3533 /* Valid address are:
3534 - symbol_ref, label_ref, const
3535 - reg
3536 - reg + const_int, where const_int is 16 byte aligned
3537 - reg + reg, alignment doesn't matter
3538 The alignment matters in the reg+const case because lqd and stqd
3539 ignore the 4 least significant bits of the const. We only care about
3540 16 byte modes because the expand phase will change all smaller MEM
3541 references to TImode. */
3542 static bool
3543 spu_legitimate_address_p (machine_mode mode,
3544 rtx x, bool reg_ok_strict)
3546 int aligned = GET_MODE_SIZE (mode) >= 16;
3547 if (aligned
3548 && GET_CODE (x) == AND
3549 && GET_CODE (XEXP (x, 1)) == CONST_INT
3550 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3551 x = XEXP (x, 0);
3552 switch (GET_CODE (x))
3554 case LABEL_REF:
3555 return !TARGET_LARGE_MEM;
3557 case SYMBOL_REF:
3558 case CONST:
3559 /* Keep __ea references until reload so that spu_expand_mov can see them
3560 in MEMs. */
3561 if (ea_symbol_ref_p (x))
3562 return !reload_in_progress && !reload_completed;
3563 return !TARGET_LARGE_MEM;
3565 case CONST_INT:
3566 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3568 case SUBREG:
3569 x = XEXP (x, 0);
3570 if (REG_P (x))
3571 return 0;
3573 case REG:
3574 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3576 case PLUS:
3577 case LO_SUM:
3579 rtx op0 = XEXP (x, 0);
3580 rtx op1 = XEXP (x, 1);
3581 if (GET_CODE (op0) == SUBREG)
3582 op0 = XEXP (op0, 0);
3583 if (GET_CODE (op1) == SUBREG)
3584 op1 = XEXP (op1, 0);
3585 if (GET_CODE (op0) == REG
3586 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3587 && GET_CODE (op1) == CONST_INT
3588 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3589 /* If virtual registers are involved, the displacement will
3590 change later on anyway, so checking would be premature.
3591 Reload will make sure the final displacement after
3592 register elimination is OK. */
3593 || op0 == arg_pointer_rtx
3594 || op0 == frame_pointer_rtx
3595 || op0 == virtual_stack_vars_rtx)
3596 && (!aligned || (INTVAL (op1) & 15) == 0))
3597 return TRUE;
3598 if (GET_CODE (op0) == REG
3599 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3600 && GET_CODE (op1) == REG
3601 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3602 return TRUE;
3604 break;
3606 default:
3607 break;
3609 return FALSE;
3612 /* Like spu_legitimate_address_p, except with named addresses. */
3613 static bool
3614 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3615 bool reg_ok_strict, addr_space_t as)
3617 if (as == ADDR_SPACE_EA)
3618 return (REG_P (x) && (GET_MODE (x) == EAmode));
3620 else if (as != ADDR_SPACE_GENERIC)
3621 gcc_unreachable ();
3623 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3626 /* When the address is reg + const_int, force the const_int into a
3627 register. */
3628 static rtx
3629 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3630 machine_mode mode ATTRIBUTE_UNUSED)
3632 rtx op0, op1;
3633 /* Make sure both operands are registers. */
3634 if (GET_CODE (x) == PLUS)
3636 op0 = XEXP (x, 0);
3637 op1 = XEXP (x, 1);
3638 if (ALIGNED_SYMBOL_REF_P (op0))
3640 op0 = force_reg (Pmode, op0);
3641 mark_reg_pointer (op0, 128);
3643 else if (GET_CODE (op0) != REG)
3644 op0 = force_reg (Pmode, op0);
3645 if (ALIGNED_SYMBOL_REF_P (op1))
3647 op1 = force_reg (Pmode, op1);
3648 mark_reg_pointer (op1, 128);
3650 else if (GET_CODE (op1) != REG)
3651 op1 = force_reg (Pmode, op1);
3652 x = gen_rtx_PLUS (Pmode, op0, op1);
3654 return x;
3657 /* Like spu_legitimate_address, except with named address support. */
3658 static rtx
3659 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3660 addr_space_t as)
3662 if (as != ADDR_SPACE_GENERIC)
3663 return x;
3665 return spu_legitimize_address (x, oldx, mode);
3668 /* Reload reg + const_int for out-of-range displacements. */
3670 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3671 int opnum, int type)
3673 bool removed_and = false;
3675 if (GET_CODE (ad) == AND
3676 && CONST_INT_P (XEXP (ad, 1))
3677 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3679 ad = XEXP (ad, 0);
3680 removed_and = true;
3683 if (GET_CODE (ad) == PLUS
3684 && REG_P (XEXP (ad, 0))
3685 && CONST_INT_P (XEXP (ad, 1))
3686 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3687 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3689 /* Unshare the sum. */
3690 ad = copy_rtx (ad);
3692 /* Reload the displacement. */
3693 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3694 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3695 opnum, (enum reload_type) type);
3697 /* Add back AND for alignment if we stripped it. */
3698 if (removed_and)
3699 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3701 return ad;
3704 return NULL_RTX;
3707 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3708 struct attribute_spec.handler. */
3709 static tree
3710 spu_handle_fndecl_attribute (tree * node,
3711 tree name,
3712 tree args ATTRIBUTE_UNUSED,
3713 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3715 if (TREE_CODE (*node) != FUNCTION_DECL)
3717 warning (0, "%qE attribute only applies to functions",
3718 name);
3719 *no_add_attrs = true;
3722 return NULL_TREE;
3725 /* Handle the "vector" attribute. */
3726 static tree
3727 spu_handle_vector_attribute (tree * node, tree name,
3728 tree args ATTRIBUTE_UNUSED,
3729 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3731 tree type = *node, result = NULL_TREE;
3732 machine_mode mode;
3733 int unsigned_p;
3735 while (POINTER_TYPE_P (type)
3736 || TREE_CODE (type) == FUNCTION_TYPE
3737 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3738 type = TREE_TYPE (type);
3740 mode = TYPE_MODE (type);
3742 unsigned_p = TYPE_UNSIGNED (type);
3743 switch (mode)
3745 case DImode:
3746 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3747 break;
3748 case SImode:
3749 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3750 break;
3751 case HImode:
3752 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3753 break;
3754 case QImode:
3755 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3756 break;
3757 case SFmode:
3758 result = V4SF_type_node;
3759 break;
3760 case DFmode:
3761 result = V2DF_type_node;
3762 break;
3763 default:
3764 break;
3767 /* Propagate qualifiers attached to the element type
3768 onto the vector type. */
3769 if (result && result != type && TYPE_QUALS (type))
3770 result = build_qualified_type (result, TYPE_QUALS (type));
3772 *no_add_attrs = true; /* No need to hang on to the attribute. */
3774 if (!result)
3775 warning (0, "%qE attribute ignored", name);
3776 else
3777 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3779 return NULL_TREE;
3782 /* Return nonzero if FUNC is a naked function. */
3783 static int
3784 spu_naked_function_p (tree func)
3786 tree a;
3788 if (TREE_CODE (func) != FUNCTION_DECL)
3789 abort ();
3791 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3792 return a != NULL_TREE;
3796 spu_initial_elimination_offset (int from, int to)
3798 int saved_regs_size = spu_saved_regs_size ();
3799 int sp_offset = 0;
3800 if (!crtl->is_leaf || crtl->outgoing_args_size
3801 || get_frame_size () || saved_regs_size)
3802 sp_offset = STACK_POINTER_OFFSET;
3803 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3804 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3805 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3806 return get_frame_size ();
3807 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3808 return sp_offset + crtl->outgoing_args_size
3809 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3810 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3811 return get_frame_size () + saved_regs_size + sp_offset;
3812 else
3813 gcc_unreachable ();
3817 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3819 machine_mode mode = TYPE_MODE (type);
3820 int byte_size = ((mode == BLKmode)
3821 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3823 /* Make sure small structs are left justified in a register. */
3824 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3825 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3827 machine_mode smode;
3828 rtvec v;
3829 int i;
3830 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3831 int n = byte_size / UNITS_PER_WORD;
3832 v = rtvec_alloc (nregs);
3833 for (i = 0; i < n; i++)
3835 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3836 gen_rtx_REG (TImode,
3837 FIRST_RETURN_REGNUM
3838 + i),
3839 GEN_INT (UNITS_PER_WORD * i));
3840 byte_size -= UNITS_PER_WORD;
3843 if (n < nregs)
3845 if (byte_size < 4)
3846 byte_size = 4;
3847 smode =
3848 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3849 RTVEC_ELT (v, n) =
3850 gen_rtx_EXPR_LIST (VOIDmode,
3851 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3852 GEN_INT (UNITS_PER_WORD * n));
3854 return gen_rtx_PARALLEL (mode, v);
3856 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3859 static rtx
3860 spu_function_arg (cumulative_args_t cum_v,
3861 machine_mode mode,
3862 const_tree type, bool named ATTRIBUTE_UNUSED)
3864 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3865 int byte_size;
3867 if (*cum >= MAX_REGISTER_ARGS)
3868 return 0;
3870 byte_size = ((mode == BLKmode)
3871 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3873 /* The ABI does not allow parameters to be passed partially in
3874 reg and partially in stack. */
3875 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3876 return 0;
3878 /* Make sure small structs are left justified in a register. */
3879 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3880 && byte_size < UNITS_PER_WORD && byte_size > 0)
3882 machine_mode smode;
3883 rtx gr_reg;
3884 if (byte_size < 4)
3885 byte_size = 4;
3886 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3887 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3888 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3889 const0_rtx);
3890 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3892 else
3893 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3896 static void
3897 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3898 const_tree type, bool named ATTRIBUTE_UNUSED)
3900 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3902 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3904 : mode == BLKmode
3905 ? ((int_size_in_bytes (type) + 15) / 16)
3906 : mode == VOIDmode
3908 : HARD_REGNO_NREGS (cum, mode));
3911 /* Variable sized types are passed by reference. */
3912 static bool
3913 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3914 machine_mode mode ATTRIBUTE_UNUSED,
3915 const_tree type, bool named ATTRIBUTE_UNUSED)
3917 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3921 /* Var args. */
3923 /* Create and return the va_list datatype.
3925 On SPU, va_list is an array type equivalent to
3927 typedef struct __va_list_tag
3929 void *__args __attribute__((__aligned(16)));
3930 void *__skip __attribute__((__aligned(16)));
3932 } va_list[1];
3934 where __args points to the arg that will be returned by the next
3935 va_arg(), and __skip points to the previous stack frame such that
3936 when __args == __skip we should advance __args by 32 bytes. */
3937 static tree
3938 spu_build_builtin_va_list (void)
3940 tree f_args, f_skip, record, type_decl;
3941 bool owp;
3943 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3945 type_decl =
3946 build_decl (BUILTINS_LOCATION,
3947 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3949 f_args = build_decl (BUILTINS_LOCATION,
3950 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3951 f_skip = build_decl (BUILTINS_LOCATION,
3952 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3954 DECL_FIELD_CONTEXT (f_args) = record;
3955 DECL_ALIGN (f_args) = 128;
3956 DECL_USER_ALIGN (f_args) = 1;
3958 DECL_FIELD_CONTEXT (f_skip) = record;
3959 DECL_ALIGN (f_skip) = 128;
3960 DECL_USER_ALIGN (f_skip) = 1;
3962 TYPE_STUB_DECL (record) = type_decl;
3963 TYPE_NAME (record) = type_decl;
3964 TYPE_FIELDS (record) = f_args;
3965 DECL_CHAIN (f_args) = f_skip;
3967 /* We know this is being padded and we want it too. It is an internal
3968 type so hide the warnings from the user. */
3969 owp = warn_padded;
3970 warn_padded = false;
3972 layout_type (record);
3974 warn_padded = owp;
3976 /* The correct type is an array type of one element. */
3977 return build_array_type (record, build_index_type (size_zero_node));
3980 /* Implement va_start by filling the va_list structure VALIST.
3981 NEXTARG points to the first anonymous stack argument.
3983 The following global variables are used to initialize
3984 the va_list structure:
3986 crtl->args.info;
3987 the CUMULATIVE_ARGS for this function
3989 crtl->args.arg_offset_rtx:
3990 holds the offset of the first anonymous stack argument
3991 (relative to the virtual arg pointer). */
3993 static void
3994 spu_va_start (tree valist, rtx nextarg)
3996 tree f_args, f_skip;
3997 tree args, skip, t;
3999 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4000 f_skip = DECL_CHAIN (f_args);
4002 valist = build_simple_mem_ref (valist);
4003 args =
4004 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4005 skip =
4006 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4008 /* Find the __args area. */
4009 t = make_tree (TREE_TYPE (args), nextarg);
4010 if (crtl->args.pretend_args_size > 0)
4011 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4012 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4013 TREE_SIDE_EFFECTS (t) = 1;
4014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4016 /* Find the __skip area. */
4017 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4018 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4019 - STACK_POINTER_OFFSET));
4020 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4021 TREE_SIDE_EFFECTS (t) = 1;
4022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4025 /* Gimplify va_arg by updating the va_list structure
4026 VALIST as required to retrieve an argument of type
4027 TYPE, and returning that argument.
4029 ret = va_arg(VALIST, TYPE);
4031 generates code equivalent to:
4033 paddedsize = (sizeof(TYPE) + 15) & -16;
4034 if (VALIST.__args + paddedsize > VALIST.__skip
4035 && VALIST.__args <= VALIST.__skip)
4036 addr = VALIST.__skip + 32;
4037 else
4038 addr = VALIST.__args;
4039 VALIST.__args = addr + paddedsize;
4040 ret = *(TYPE *)addr;
4042 static tree
4043 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4044 gimple_seq * post_p ATTRIBUTE_UNUSED)
4046 tree f_args, f_skip;
4047 tree args, skip;
4048 HOST_WIDE_INT size, rsize;
4049 tree addr, tmp;
4050 bool pass_by_reference_p;
4052 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4053 f_skip = DECL_CHAIN (f_args);
4055 args =
4056 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4057 skip =
4058 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4060 addr = create_tmp_var (ptr_type_node, "va_arg");
4062 /* if an object is dynamically sized, a pointer to it is passed
4063 instead of the object itself. */
4064 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4065 false);
4066 if (pass_by_reference_p)
4067 type = build_pointer_type (type);
4068 size = int_size_in_bytes (type);
4069 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4071 /* build conditional expression to calculate addr. The expression
4072 will be gimplified later. */
4073 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4074 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4075 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4076 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4077 unshare_expr (skip)));
4079 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4080 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4081 unshare_expr (args));
4083 gimplify_assign (addr, tmp, pre_p);
4085 /* update VALIST.__args */
4086 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4087 gimplify_assign (unshare_expr (args), tmp, pre_p);
4089 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4090 addr);
4092 if (pass_by_reference_p)
4093 addr = build_va_arg_indirect_ref (addr);
4095 return build_va_arg_indirect_ref (addr);
4098 /* Save parameter registers starting with the register that corresponds
4099 to the first unnamed parameters. If the first unnamed parameter is
4100 in the stack then save no registers. Set pretend_args_size to the
4101 amount of space needed to save the registers. */
4102 static void
4103 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4104 tree type, int *pretend_size, int no_rtl)
4106 if (!no_rtl)
4108 rtx tmp;
4109 int regno;
4110 int offset;
4111 int ncum = *get_cumulative_args (cum);
4113 /* cum currently points to the last named argument, we want to
4114 start at the next argument. */
4115 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4117 offset = -STACK_POINTER_OFFSET;
4118 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4120 tmp = gen_frame_mem (V4SImode,
4121 plus_constant (Pmode, virtual_incoming_args_rtx,
4122 offset));
4123 emit_move_insn (tmp,
4124 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4125 offset += 16;
4127 *pretend_size = offset + STACK_POINTER_OFFSET;
4131 static void
4132 spu_conditional_register_usage (void)
4134 if (flag_pic)
4136 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4137 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4141 /* This is called any time we inspect the alignment of a register for
4142 addresses. */
4143 static int
4144 reg_aligned_for_addr (rtx x)
4146 int regno =
4147 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4148 return REGNO_POINTER_ALIGN (regno) >= 128;
4151 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4152 into its SYMBOL_REF_FLAGS. */
4153 static void
4154 spu_encode_section_info (tree decl, rtx rtl, int first)
4156 default_encode_section_info (decl, rtl, first);
4158 /* If a variable has a forced alignment to < 16 bytes, mark it with
4159 SYMBOL_FLAG_ALIGN1. */
4160 if (TREE_CODE (decl) == VAR_DECL
4161 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4162 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4165 /* Return TRUE if we are certain the mem refers to a complete object
4166 which is both 16-byte aligned and padded to a 16-byte boundary. This
4167 would make it safe to store with a single instruction.
4168 We guarantee the alignment and padding for static objects by aligning
4169 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4170 FIXME: We currently cannot guarantee this for objects on the stack
4171 because assign_parm_setup_stack calls assign_stack_local with the
4172 alignment of the parameter mode and in that case the alignment never
4173 gets adjusted by LOCAL_ALIGNMENT. */
4174 static int
4175 store_with_one_insn_p (rtx mem)
4177 machine_mode mode = GET_MODE (mem);
4178 rtx addr = XEXP (mem, 0);
4179 if (mode == BLKmode)
4180 return 0;
4181 if (GET_MODE_SIZE (mode) >= 16)
4182 return 1;
4183 /* Only static objects. */
4184 if (GET_CODE (addr) == SYMBOL_REF)
4186 /* We use the associated declaration to make sure the access is
4187 referring to the whole object.
4188 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4189 if it is necessary. Will there be cases where one exists, and
4190 the other does not? Will there be cases where both exist, but
4191 have different types? */
4192 tree decl = MEM_EXPR (mem);
4193 if (decl
4194 && TREE_CODE (decl) == VAR_DECL
4195 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4196 return 1;
4197 decl = SYMBOL_REF_DECL (addr);
4198 if (decl
4199 && TREE_CODE (decl) == VAR_DECL
4200 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4201 return 1;
4203 return 0;
4206 /* Return 1 when the address is not valid for a simple load and store as
4207 required by the '_mov*' patterns. We could make this less strict
4208 for loads, but we prefer mem's to look the same so they are more
4209 likely to be merged. */
4210 static int
4211 address_needs_split (rtx mem)
4213 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4214 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4215 || !(store_with_one_insn_p (mem)
4216 || mem_is_padded_component_ref (mem))))
4217 return 1;
4219 return 0;
4222 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4223 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4224 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4226 /* MEM is known to be an __ea qualified memory access. Emit a call to
4227 fetch the ppu memory to local store, and return its address in local
4228 store. */
4230 static void
4231 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4233 if (is_store)
4235 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4236 if (!cache_fetch_dirty)
4237 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4238 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4239 2, ea_addr, EAmode, ndirty, SImode);
4241 else
4243 if (!cache_fetch)
4244 cache_fetch = init_one_libfunc ("__cache_fetch");
4245 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4246 1, ea_addr, EAmode);
4250 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4251 dirty bit marking, inline.
4253 The cache control data structure is an array of
4255 struct __cache_tag_array
4257 unsigned int tag_lo[4];
4258 unsigned int tag_hi[4];
4259 void *data_pointer[4];
4260 int reserved[4];
4261 vector unsigned short dirty_bits[4];
4262 } */
4264 static void
4265 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4267 rtx ea_addr_si;
4268 HOST_WIDE_INT v;
4269 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4270 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4271 rtx index_mask = gen_reg_rtx (SImode);
4272 rtx tag_arr = gen_reg_rtx (Pmode);
4273 rtx splat_mask = gen_reg_rtx (TImode);
4274 rtx splat = gen_reg_rtx (V4SImode);
4275 rtx splat_hi = NULL_RTX;
4276 rtx tag_index = gen_reg_rtx (Pmode);
4277 rtx block_off = gen_reg_rtx (SImode);
4278 rtx tag_addr = gen_reg_rtx (Pmode);
4279 rtx tag = gen_reg_rtx (V4SImode);
4280 rtx cache_tag = gen_reg_rtx (V4SImode);
4281 rtx cache_tag_hi = NULL_RTX;
4282 rtx cache_ptrs = gen_reg_rtx (TImode);
4283 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4284 rtx tag_equal = gen_reg_rtx (V4SImode);
4285 rtx tag_equal_hi = NULL_RTX;
4286 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4287 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4288 rtx eq_index = gen_reg_rtx (SImode);
4289 rtx bcomp, hit_label, hit_ref, cont_label;
4290 rtx_insn *insn;
4292 if (spu_ea_model != 32)
4294 splat_hi = gen_reg_rtx (V4SImode);
4295 cache_tag_hi = gen_reg_rtx (V4SImode);
4296 tag_equal_hi = gen_reg_rtx (V4SImode);
4299 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4300 emit_move_insn (tag_arr, tag_arr_sym);
4301 v = 0x0001020300010203LL;
4302 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4303 ea_addr_si = ea_addr;
4304 if (spu_ea_model != 32)
4305 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4307 /* tag_index = ea_addr & (tag_array_size - 128) */
4308 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4310 /* splat ea_addr to all 4 slots. */
4311 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4312 /* Similarly for high 32 bits of ea_addr. */
4313 if (spu_ea_model != 32)
4314 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4316 /* block_off = ea_addr & 127 */
4317 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4319 /* tag_addr = tag_arr + tag_index */
4320 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4322 /* Read cache tags. */
4323 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4324 if (spu_ea_model != 32)
4325 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4326 plus_constant (Pmode,
4327 tag_addr, 16)));
4329 /* tag = ea_addr & -128 */
4330 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4332 /* Read all four cache data pointers. */
4333 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4334 plus_constant (Pmode,
4335 tag_addr, 32)));
4337 /* Compare tags. */
4338 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4339 if (spu_ea_model != 32)
4341 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4342 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4345 /* At most one of the tags compare equal, so tag_equal has one
4346 32-bit slot set to all 1's, with the other slots all zero.
4347 gbb picks off low bit from each byte in the 128-bit registers,
4348 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4349 we have a hit. */
4350 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4351 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4353 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4354 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4356 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4357 (rotating eq_index mod 16 bytes). */
4358 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4359 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4361 /* Add block offset to form final data address. */
4362 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4364 /* Check that we did hit. */
4365 hit_label = gen_label_rtx ();
4366 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4367 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4368 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4369 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4370 hit_ref, pc_rtx)));
4371 /* Say that this branch is very likely to happen. */
4372 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4373 add_int_reg_note (insn, REG_BR_PROB, v);
4375 ea_load_store (mem, is_store, ea_addr, data_addr);
4376 cont_label = gen_label_rtx ();
4377 emit_jump_insn (gen_jump (cont_label));
4378 emit_barrier ();
4380 emit_label (hit_label);
4382 if (is_store)
4384 HOST_WIDE_INT v_hi;
4385 rtx dirty_bits = gen_reg_rtx (TImode);
4386 rtx dirty_off = gen_reg_rtx (SImode);
4387 rtx dirty_128 = gen_reg_rtx (TImode);
4388 rtx neg_block_off = gen_reg_rtx (SImode);
4390 /* Set up mask with one dirty bit per byte of the mem we are
4391 writing, starting from top bit. */
4392 v_hi = v = -1;
4393 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4394 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4396 v_hi = v;
4397 v = 0;
4399 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4401 /* Form index into cache dirty_bits. eq_index is one of
4402 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4403 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4404 offset to each of the four dirty_bits elements. */
4405 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4407 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4409 /* Rotate bit mask to proper bit. */
4410 emit_insn (gen_negsi2 (neg_block_off, block_off));
4411 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4412 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4414 /* Or in the new dirty bits. */
4415 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4417 /* Store. */
4418 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4421 emit_label (cont_label);
4424 static rtx
4425 expand_ea_mem (rtx mem, bool is_store)
4427 rtx ea_addr;
4428 rtx data_addr = gen_reg_rtx (Pmode);
4429 rtx new_mem;
4431 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4432 if (optimize_size || optimize == 0)
4433 ea_load_store (mem, is_store, ea_addr, data_addr);
4434 else
4435 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4437 if (ea_alias_set == -1)
4438 ea_alias_set = new_alias_set ();
4440 /* We generate a new MEM RTX to refer to the copy of the data
4441 in the cache. We do not copy memory attributes (except the
4442 alignment) from the original MEM, as they may no longer apply
4443 to the cache copy. */
4444 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4445 set_mem_alias_set (new_mem, ea_alias_set);
4446 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4448 return new_mem;
4452 spu_expand_mov (rtx * ops, machine_mode mode)
4454 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4456 /* Perform the move in the destination SUBREG's inner mode. */
4457 ops[0] = SUBREG_REG (ops[0]);
4458 mode = GET_MODE (ops[0]);
4459 ops[1] = gen_lowpart_common (mode, ops[1]);
4460 gcc_assert (ops[1]);
4463 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4465 rtx from = SUBREG_REG (ops[1]);
4466 machine_mode imode = int_mode_for_mode (GET_MODE (from));
4468 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4469 && GET_MODE_CLASS (imode) == MODE_INT
4470 && subreg_lowpart_p (ops[1]));
4472 if (GET_MODE_SIZE (imode) < 4)
4473 imode = SImode;
4474 if (imode != GET_MODE (from))
4475 from = gen_rtx_SUBREG (imode, from, 0);
4477 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4479 enum insn_code icode = convert_optab_handler (trunc_optab,
4480 mode, imode);
4481 emit_insn (GEN_FCN (icode) (ops[0], from));
4483 else
4484 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4485 return 1;
4488 /* At least one of the operands needs to be a register. */
4489 if ((reload_in_progress | reload_completed) == 0
4490 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4492 rtx temp = force_reg (mode, ops[1]);
4493 emit_move_insn (ops[0], temp);
4494 return 1;
4496 if (reload_in_progress || reload_completed)
4498 if (CONSTANT_P (ops[1]))
4499 return spu_split_immediate (ops);
4500 return 0;
4503 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4504 extend them. */
4505 if (GET_CODE (ops[1]) == CONST_INT)
4507 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4508 if (val != INTVAL (ops[1]))
4510 emit_move_insn (ops[0], GEN_INT (val));
4511 return 1;
4514 if (MEM_P (ops[0]))
4516 if (MEM_ADDR_SPACE (ops[0]))
4517 ops[0] = expand_ea_mem (ops[0], true);
4518 return spu_split_store (ops);
4520 if (MEM_P (ops[1]))
4522 if (MEM_ADDR_SPACE (ops[1]))
4523 ops[1] = expand_ea_mem (ops[1], false);
4524 return spu_split_load (ops);
4527 return 0;
4530 static void
4531 spu_convert_move (rtx dst, rtx src)
4533 machine_mode mode = GET_MODE (dst);
4534 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4535 rtx reg;
4536 gcc_assert (GET_MODE (src) == TImode);
4537 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4538 emit_insn (gen_rtx_SET (reg,
4539 gen_rtx_TRUNCATE (int_mode,
4540 gen_rtx_LSHIFTRT (TImode, src,
4541 GEN_INT (int_mode == DImode ? 64 : 96)))));
4542 if (int_mode != mode)
4544 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4545 emit_move_insn (dst, reg);
4549 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4550 the address from SRC and SRC+16. Return a REG or CONST_INT that
4551 specifies how many bytes to rotate the loaded registers, plus any
4552 extra from EXTRA_ROTQBY. The address and rotate amounts are
4553 normalized to improve merging of loads and rotate computations. */
4554 static rtx
4555 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4557 rtx addr = XEXP (src, 0);
4558 rtx p0, p1, rot, addr0, addr1;
4559 int rot_amt;
4561 rot = 0;
4562 rot_amt = 0;
4564 if (MEM_ALIGN (src) >= 128)
4565 /* Address is already aligned; simply perform a TImode load. */ ;
4566 else if (GET_CODE (addr) == PLUS)
4568 /* 8 cases:
4569 aligned reg + aligned reg => lqx
4570 aligned reg + unaligned reg => lqx, rotqby
4571 aligned reg + aligned const => lqd
4572 aligned reg + unaligned const => lqd, rotqbyi
4573 unaligned reg + aligned reg => lqx, rotqby
4574 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4575 unaligned reg + aligned const => lqd, rotqby
4576 unaligned reg + unaligned const -> not allowed by legitimate address
4578 p0 = XEXP (addr, 0);
4579 p1 = XEXP (addr, 1);
4580 if (!reg_aligned_for_addr (p0))
4582 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4584 rot = gen_reg_rtx (SImode);
4585 emit_insn (gen_addsi3 (rot, p0, p1));
4587 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4589 if (INTVAL (p1) > 0
4590 && REG_POINTER (p0)
4591 && INTVAL (p1) * BITS_PER_UNIT
4592 < REGNO_POINTER_ALIGN (REGNO (p0)))
4594 rot = gen_reg_rtx (SImode);
4595 emit_insn (gen_addsi3 (rot, p0, p1));
4596 addr = p0;
4598 else
4600 rtx x = gen_reg_rtx (SImode);
4601 emit_move_insn (x, p1);
4602 if (!spu_arith_operand (p1, SImode))
4603 p1 = x;
4604 rot = gen_reg_rtx (SImode);
4605 emit_insn (gen_addsi3 (rot, p0, p1));
4606 addr = gen_rtx_PLUS (Pmode, p0, x);
4609 else
4610 rot = p0;
4612 else
4614 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4616 rot_amt = INTVAL (p1) & 15;
4617 if (INTVAL (p1) & -16)
4619 p1 = GEN_INT (INTVAL (p1) & -16);
4620 addr = gen_rtx_PLUS (SImode, p0, p1);
4622 else
4623 addr = p0;
4625 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4626 rot = p1;
4629 else if (REG_P (addr))
4631 if (!reg_aligned_for_addr (addr))
4632 rot = addr;
4634 else if (GET_CODE (addr) == CONST)
4636 if (GET_CODE (XEXP (addr, 0)) == PLUS
4637 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4638 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4640 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4641 if (rot_amt & -16)
4642 addr = gen_rtx_CONST (Pmode,
4643 gen_rtx_PLUS (Pmode,
4644 XEXP (XEXP (addr, 0), 0),
4645 GEN_INT (rot_amt & -16)));
4646 else
4647 addr = XEXP (XEXP (addr, 0), 0);
4649 else
4651 rot = gen_reg_rtx (Pmode);
4652 emit_move_insn (rot, addr);
4655 else if (GET_CODE (addr) == CONST_INT)
4657 rot_amt = INTVAL (addr);
4658 addr = GEN_INT (rot_amt & -16);
4660 else if (!ALIGNED_SYMBOL_REF_P (addr))
4662 rot = gen_reg_rtx (Pmode);
4663 emit_move_insn (rot, addr);
4666 rot_amt += extra_rotby;
4668 rot_amt &= 15;
4670 if (rot && rot_amt)
4672 rtx x = gen_reg_rtx (SImode);
4673 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4674 rot = x;
4675 rot_amt = 0;
4677 if (!rot && rot_amt)
4678 rot = GEN_INT (rot_amt);
4680 addr0 = copy_rtx (addr);
4681 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4682 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4684 if (dst1)
4686 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4687 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4688 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4691 return rot;
4695 spu_split_load (rtx * ops)
4697 machine_mode mode = GET_MODE (ops[0]);
4698 rtx addr, load, rot;
4699 int rot_amt;
4701 if (GET_MODE_SIZE (mode) >= 16)
4702 return 0;
4704 addr = XEXP (ops[1], 0);
4705 gcc_assert (GET_CODE (addr) != AND);
4707 if (!address_needs_split (ops[1]))
4709 ops[1] = change_address (ops[1], TImode, addr);
4710 load = gen_reg_rtx (TImode);
4711 emit_insn (gen__movti (load, ops[1]));
4712 spu_convert_move (ops[0], load);
4713 return 1;
4716 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4718 load = gen_reg_rtx (TImode);
4719 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4721 if (rot)
4722 emit_insn (gen_rotqby_ti (load, load, rot));
4724 spu_convert_move (ops[0], load);
4725 return 1;
4729 spu_split_store (rtx * ops)
4731 machine_mode mode = GET_MODE (ops[0]);
4732 rtx reg;
4733 rtx addr, p0, p1, p1_lo, smem;
4734 int aform;
4735 int scalar;
4737 if (GET_MODE_SIZE (mode) >= 16)
4738 return 0;
4740 addr = XEXP (ops[0], 0);
4741 gcc_assert (GET_CODE (addr) != AND);
4743 if (!address_needs_split (ops[0]))
4745 reg = gen_reg_rtx (TImode);
4746 emit_insn (gen_spu_convert (reg, ops[1]));
4747 ops[0] = change_address (ops[0], TImode, addr);
4748 emit_move_insn (ops[0], reg);
4749 return 1;
4752 if (GET_CODE (addr) == PLUS)
4754 /* 8 cases:
4755 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4756 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4757 aligned reg + aligned const => lqd, c?d, shuf, stqx
4758 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4759 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4760 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4761 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4762 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4764 aform = 0;
4765 p0 = XEXP (addr, 0);
4766 p1 = p1_lo = XEXP (addr, 1);
4767 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4769 p1_lo = GEN_INT (INTVAL (p1) & 15);
4770 if (reg_aligned_for_addr (p0))
4772 p1 = GEN_INT (INTVAL (p1) & -16);
4773 if (p1 == const0_rtx)
4774 addr = p0;
4775 else
4776 addr = gen_rtx_PLUS (SImode, p0, p1);
4778 else
4780 rtx x = gen_reg_rtx (SImode);
4781 emit_move_insn (x, p1);
4782 addr = gen_rtx_PLUS (SImode, p0, x);
4786 else if (REG_P (addr))
4788 aform = 0;
4789 p0 = addr;
4790 p1 = p1_lo = const0_rtx;
4792 else
4794 aform = 1;
4795 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4796 p1 = 0; /* aform doesn't use p1 */
4797 p1_lo = addr;
4798 if (ALIGNED_SYMBOL_REF_P (addr))
4799 p1_lo = const0_rtx;
4800 else if (GET_CODE (addr) == CONST
4801 && GET_CODE (XEXP (addr, 0)) == PLUS
4802 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4803 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4805 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4806 if ((v & -16) != 0)
4807 addr = gen_rtx_CONST (Pmode,
4808 gen_rtx_PLUS (Pmode,
4809 XEXP (XEXP (addr, 0), 0),
4810 GEN_INT (v & -16)));
4811 else
4812 addr = XEXP (XEXP (addr, 0), 0);
4813 p1_lo = GEN_INT (v & 15);
4815 else if (GET_CODE (addr) == CONST_INT)
4817 p1_lo = GEN_INT (INTVAL (addr) & 15);
4818 addr = GEN_INT (INTVAL (addr) & -16);
4820 else
4822 p1_lo = gen_reg_rtx (SImode);
4823 emit_move_insn (p1_lo, addr);
4827 gcc_assert (aform == 0 || aform == 1);
4828 reg = gen_reg_rtx (TImode);
4830 scalar = store_with_one_insn_p (ops[0]);
4831 if (!scalar)
4833 /* We could copy the flags from the ops[0] MEM to mem here,
4834 We don't because we want this load to be optimized away if
4835 possible, and copying the flags will prevent that in certain
4836 cases, e.g. consider the volatile flag. */
4838 rtx pat = gen_reg_rtx (TImode);
4839 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4840 set_mem_alias_set (lmem, 0);
4841 emit_insn (gen_movti (reg, lmem));
4843 if (!p0 || reg_aligned_for_addr (p0))
4844 p0 = stack_pointer_rtx;
4845 if (!p1_lo)
4846 p1_lo = const0_rtx;
4848 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4849 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4851 else
4853 if (GET_CODE (ops[1]) == REG)
4854 emit_insn (gen_spu_convert (reg, ops[1]));
4855 else if (GET_CODE (ops[1]) == SUBREG)
4856 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4857 else
4858 abort ();
4861 if (GET_MODE_SIZE (mode) < 4 && scalar)
4862 emit_insn (gen_ashlti3
4863 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4865 smem = change_address (ops[0], TImode, copy_rtx (addr));
4866 /* We can't use the previous alias set because the memory has changed
4867 size and can potentially overlap objects of other types. */
4868 set_mem_alias_set (smem, 0);
4870 emit_insn (gen_movti (smem, reg));
4871 return 1;
4874 /* Return TRUE if X is MEM which is a struct member reference
4875 and the member can safely be loaded and stored with a single
4876 instruction because it is padded. */
4877 static int
4878 mem_is_padded_component_ref (rtx x)
4880 tree t = MEM_EXPR (x);
4881 tree r;
4882 if (!t || TREE_CODE (t) != COMPONENT_REF)
4883 return 0;
4884 t = TREE_OPERAND (t, 1);
4885 if (!t || TREE_CODE (t) != FIELD_DECL
4886 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4887 return 0;
4888 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4889 r = DECL_FIELD_CONTEXT (t);
4890 if (!r || TREE_CODE (r) != RECORD_TYPE)
4891 return 0;
4892 /* Make sure they are the same mode */
4893 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4894 return 0;
4895 /* If there are no following fields then the field alignment assures
4896 the structure is padded to the alignment which means this field is
4897 padded too. */
4898 if (TREE_CHAIN (t) == 0)
4899 return 1;
4900 /* If the following field is also aligned then this field will be
4901 padded. */
4902 t = TREE_CHAIN (t);
4903 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4904 return 1;
4905 return 0;
4908 /* Parse the -mfixed-range= option string. */
4909 static void
4910 fix_range (const char *const_str)
4912 int i, first, last;
4913 char *str, *dash, *comma;
4915 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4916 REG2 are either register names or register numbers. The effect
4917 of this option is to mark the registers in the range from REG1 to
4918 REG2 as ``fixed'' so they won't be used by the compiler. */
4920 i = strlen (const_str);
4921 str = (char *) alloca (i + 1);
4922 memcpy (str, const_str, i + 1);
4924 while (1)
4926 dash = strchr (str, '-');
4927 if (!dash)
4929 warning (0, "value of -mfixed-range must have form REG1-REG2");
4930 return;
4932 *dash = '\0';
4933 comma = strchr (dash + 1, ',');
4934 if (comma)
4935 *comma = '\0';
4937 first = decode_reg_name (str);
4938 if (first < 0)
4940 warning (0, "unknown register name: %s", str);
4941 return;
4944 last = decode_reg_name (dash + 1);
4945 if (last < 0)
4947 warning (0, "unknown register name: %s", dash + 1);
4948 return;
4951 *dash = '-';
4953 if (first > last)
4955 warning (0, "%s-%s is an empty range", str, dash + 1);
4956 return;
4959 for (i = first; i <= last; ++i)
4960 fixed_regs[i] = call_used_regs[i] = 1;
4962 if (!comma)
4963 break;
4965 *comma = ',';
4966 str = comma + 1;
4970 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4971 can be generated using the fsmbi instruction. */
4973 fsmbi_const_p (rtx x)
4975 if (CONSTANT_P (x))
4977 /* We can always choose TImode for CONST_INT because the high bits
4978 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4979 enum immediate_class c = classify_immediate (x, TImode);
4980 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4982 return 0;
4985 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4986 can be generated using the cbd, chd, cwd or cdd instruction. */
4988 cpat_const_p (rtx x, machine_mode mode)
4990 if (CONSTANT_P (x))
4992 enum immediate_class c = classify_immediate (x, mode);
4993 return c == IC_CPAT;
4995 return 0;
4999 gen_cpat_const (rtx * ops)
5001 unsigned char dst[16];
5002 int i, offset, shift, isize;
5003 if (GET_CODE (ops[3]) != CONST_INT
5004 || GET_CODE (ops[2]) != CONST_INT
5005 || (GET_CODE (ops[1]) != CONST_INT
5006 && GET_CODE (ops[1]) != REG))
5007 return 0;
5008 if (GET_CODE (ops[1]) == REG
5009 && (!REG_POINTER (ops[1])
5010 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5011 return 0;
5013 for (i = 0; i < 16; i++)
5014 dst[i] = i + 16;
5015 isize = INTVAL (ops[3]);
5016 if (isize == 1)
5017 shift = 3;
5018 else if (isize == 2)
5019 shift = 2;
5020 else
5021 shift = 0;
5022 offset = (INTVAL (ops[2]) +
5023 (GET_CODE (ops[1]) ==
5024 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5025 for (i = 0; i < isize; i++)
5026 dst[offset + i] = i + shift;
5027 return array_to_constant (TImode, dst);
5030 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5031 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5032 than 16 bytes, the value is repeated across the rest of the array. */
5033 void
5034 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5036 HOST_WIDE_INT val;
5037 int i, j, first;
5039 memset (arr, 0, 16);
5040 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5041 if (GET_CODE (x) == CONST_INT
5042 || (GET_CODE (x) == CONST_DOUBLE
5043 && (mode == SFmode || mode == DFmode)))
5045 gcc_assert (mode != VOIDmode && mode != BLKmode);
5047 if (GET_CODE (x) == CONST_DOUBLE)
5048 val = const_double_to_hwint (x);
5049 else
5050 val = INTVAL (x);
5051 first = GET_MODE_SIZE (mode) - 1;
5052 for (i = first; i >= 0; i--)
5054 arr[i] = val & 0xff;
5055 val >>= 8;
5057 /* Splat the constant across the whole array. */
5058 for (j = 0, i = first + 1; i < 16; i++)
5060 arr[i] = arr[j];
5061 j = (j == first) ? 0 : j + 1;
5064 else if (GET_CODE (x) == CONST_DOUBLE)
5066 val = CONST_DOUBLE_LOW (x);
5067 for (i = 15; i >= 8; i--)
5069 arr[i] = val & 0xff;
5070 val >>= 8;
5072 val = CONST_DOUBLE_HIGH (x);
5073 for (i = 7; i >= 0; i--)
5075 arr[i] = val & 0xff;
5076 val >>= 8;
5079 else if (GET_CODE (x) == CONST_VECTOR)
5081 int units;
5082 rtx elt;
5083 mode = GET_MODE_INNER (mode);
5084 units = CONST_VECTOR_NUNITS (x);
5085 for (i = 0; i < units; i++)
5087 elt = CONST_VECTOR_ELT (x, i);
5088 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5090 if (GET_CODE (elt) == CONST_DOUBLE)
5091 val = const_double_to_hwint (elt);
5092 else
5093 val = INTVAL (elt);
5094 first = GET_MODE_SIZE (mode) - 1;
5095 if (first + i * GET_MODE_SIZE (mode) > 16)
5096 abort ();
5097 for (j = first; j >= 0; j--)
5099 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5100 val >>= 8;
5105 else
5106 gcc_unreachable();
5109 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5110 smaller than 16 bytes, use the bytes that would represent that value
5111 in a register, e.g., for QImode return the value of arr[3]. */
5113 array_to_constant (machine_mode mode, const unsigned char arr[16])
5115 machine_mode inner_mode;
5116 rtvec v;
5117 int units, size, i, j, k;
5118 HOST_WIDE_INT val;
5120 if (GET_MODE_CLASS (mode) == MODE_INT
5121 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5123 j = GET_MODE_SIZE (mode);
5124 i = j < 4 ? 4 - j : 0;
5125 for (val = 0; i < j; i++)
5126 val = (val << 8) | arr[i];
5127 val = trunc_int_for_mode (val, mode);
5128 return GEN_INT (val);
5131 if (mode == TImode)
5133 HOST_WIDE_INT high;
5134 for (i = high = 0; i < 8; i++)
5135 high = (high << 8) | arr[i];
5136 for (i = 8, val = 0; i < 16; i++)
5137 val = (val << 8) | arr[i];
5138 return immed_double_const (val, high, TImode);
5140 if (mode == SFmode)
5142 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5143 val = trunc_int_for_mode (val, SImode);
5144 return hwint_to_const_double (SFmode, val);
5146 if (mode == DFmode)
5148 for (i = 0, val = 0; i < 8; i++)
5149 val = (val << 8) | arr[i];
5150 return hwint_to_const_double (DFmode, val);
5153 if (!VECTOR_MODE_P (mode))
5154 abort ();
5156 units = GET_MODE_NUNITS (mode);
5157 size = GET_MODE_UNIT_SIZE (mode);
5158 inner_mode = GET_MODE_INNER (mode);
5159 v = rtvec_alloc (units);
5161 for (k = i = 0; i < units; ++i)
5163 val = 0;
5164 for (j = 0; j < size; j++, k++)
5165 val = (val << 8) | arr[k];
5167 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5168 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5169 else
5170 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5172 if (k > 16)
5173 abort ();
5175 return gen_rtx_CONST_VECTOR (mode, v);
5178 static void
5179 reloc_diagnostic (rtx x)
5181 tree decl = 0;
5182 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5183 return;
5185 if (GET_CODE (x) == SYMBOL_REF)
5186 decl = SYMBOL_REF_DECL (x);
5187 else if (GET_CODE (x) == CONST
5188 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5189 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5191 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5192 if (decl && !DECL_P (decl))
5193 decl = 0;
5195 /* The decl could be a string constant. */
5196 if (decl && DECL_P (decl))
5198 location_t loc;
5199 /* We use last_assemble_variable_decl to get line information. It's
5200 not always going to be right and might not even be close, but will
5201 be right for the more common cases. */
5202 if (!last_assemble_variable_decl || in_section == ctors_section)
5203 loc = DECL_SOURCE_LOCATION (decl);
5204 else
5205 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5207 if (TARGET_WARN_RELOC)
5208 warning_at (loc, 0,
5209 "creating run-time relocation for %qD", decl);
5210 else
5211 error_at (loc,
5212 "creating run-time relocation for %qD", decl);
5214 else
5216 if (TARGET_WARN_RELOC)
5217 warning_at (input_location, 0, "creating run-time relocation");
5218 else
5219 error_at (input_location, "creating run-time relocation");
5223 /* Hook into assemble_integer so we can generate an error for run-time
5224 relocations. The SPU ABI disallows them. */
5225 static bool
5226 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5228 /* By default run-time relocations aren't supported, but we allow them
5229 in case users support it in their own run-time loader. And we provide
5230 a warning for those users that don't. */
5231 if ((GET_CODE (x) == SYMBOL_REF)
5232 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5233 reloc_diagnostic (x);
5235 return default_assemble_integer (x, size, aligned_p);
5238 static void
5239 spu_asm_globalize_label (FILE * file, const char *name)
5241 fputs ("\t.global\t", file);
5242 assemble_name (file, name);
5243 fputs ("\n", file);
5246 static bool
5247 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5248 int opno ATTRIBUTE_UNUSED, int *total,
5249 bool speed ATTRIBUTE_UNUSED)
5251 machine_mode mode = GET_MODE (x);
5252 int cost = COSTS_N_INSNS (2);
5254 /* Folding to a CONST_VECTOR will use extra space but there might
5255 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5256 only if it allows us to fold away multiple insns. Changing the cost
5257 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5258 because this cost will only be compared against a single insn.
5259 if (code == CONST_VECTOR)
5260 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5263 /* Use defaults for float operations. Not accurate but good enough. */
5264 if (mode == DFmode)
5266 *total = COSTS_N_INSNS (13);
5267 return true;
5269 if (mode == SFmode)
5271 *total = COSTS_N_INSNS (6);
5272 return true;
5274 switch (code)
5276 case CONST_INT:
5277 if (satisfies_constraint_K (x))
5278 *total = 0;
5279 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5280 *total = COSTS_N_INSNS (1);
5281 else
5282 *total = COSTS_N_INSNS (3);
5283 return true;
5285 case CONST:
5286 *total = COSTS_N_INSNS (3);
5287 return true;
5289 case LABEL_REF:
5290 case SYMBOL_REF:
5291 *total = COSTS_N_INSNS (0);
5292 return true;
5294 case CONST_DOUBLE:
5295 *total = COSTS_N_INSNS (5);
5296 return true;
5298 case FLOAT_EXTEND:
5299 case FLOAT_TRUNCATE:
5300 case FLOAT:
5301 case UNSIGNED_FLOAT:
5302 case FIX:
5303 case UNSIGNED_FIX:
5304 *total = COSTS_N_INSNS (7);
5305 return true;
5307 case PLUS:
5308 if (mode == TImode)
5310 *total = COSTS_N_INSNS (9);
5311 return true;
5313 break;
5315 case MULT:
5316 cost =
5317 GET_CODE (XEXP (x, 0)) ==
5318 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5319 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5321 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5323 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5324 cost = COSTS_N_INSNS (14);
5325 if ((val & 0xffff) == 0)
5326 cost = COSTS_N_INSNS (9);
5327 else if (val > 0 && val < 0x10000)
5328 cost = COSTS_N_INSNS (11);
5331 *total = cost;
5332 return true;
5333 case DIV:
5334 case UDIV:
5335 case MOD:
5336 case UMOD:
5337 *total = COSTS_N_INSNS (20);
5338 return true;
5339 case ROTATE:
5340 case ROTATERT:
5341 case ASHIFT:
5342 case ASHIFTRT:
5343 case LSHIFTRT:
5344 *total = COSTS_N_INSNS (4);
5345 return true;
5346 case UNSPEC:
5347 if (XINT (x, 1) == UNSPEC_CONVERT)
5348 *total = COSTS_N_INSNS (0);
5349 else
5350 *total = COSTS_N_INSNS (4);
5351 return true;
5353 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5354 if (GET_MODE_CLASS (mode) == MODE_INT
5355 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5356 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5357 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5358 *total = cost;
5359 return true;
5362 static machine_mode
5363 spu_unwind_word_mode (void)
5365 return SImode;
5368 /* Decide whether we can make a sibling call to a function. DECL is the
5369 declaration of the function being targeted by the call and EXP is the
5370 CALL_EXPR representing the call. */
5371 static bool
5372 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5374 return decl && !TARGET_LARGE_MEM;
5377 /* We need to correctly update the back chain pointer and the Available
5378 Stack Size (which is in the second slot of the sp register.) */
5379 void
5380 spu_allocate_stack (rtx op0, rtx op1)
5382 HOST_WIDE_INT v;
5383 rtx chain = gen_reg_rtx (V4SImode);
5384 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5385 rtx sp = gen_reg_rtx (V4SImode);
5386 rtx splatted = gen_reg_rtx (V4SImode);
5387 rtx pat = gen_reg_rtx (TImode);
5389 /* copy the back chain so we can save it back again. */
5390 emit_move_insn (chain, stack_bot);
5392 op1 = force_reg (SImode, op1);
5394 v = 0x1020300010203ll;
5395 emit_move_insn (pat, immed_double_const (v, v, TImode));
5396 emit_insn (gen_shufb (splatted, op1, op1, pat));
5398 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5399 emit_insn (gen_subv4si3 (sp, sp, splatted));
5401 if (flag_stack_check)
5403 rtx avail = gen_reg_rtx(SImode);
5404 rtx result = gen_reg_rtx(SImode);
5405 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5406 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5407 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5410 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5412 emit_move_insn (stack_bot, chain);
5414 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5417 void
5418 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5420 static unsigned char arr[16] =
5421 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5422 rtx temp = gen_reg_rtx (SImode);
5423 rtx temp2 = gen_reg_rtx (SImode);
5424 rtx temp3 = gen_reg_rtx (V4SImode);
5425 rtx temp4 = gen_reg_rtx (V4SImode);
5426 rtx pat = gen_reg_rtx (TImode);
5427 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5429 /* Restore the backchain from the first word, sp from the second. */
5430 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5431 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5433 emit_move_insn (pat, array_to_constant (TImode, arr));
5435 /* Compute Available Stack Size for sp */
5436 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5437 emit_insn (gen_shufb (temp3, temp, temp, pat));
5439 /* Compute Available Stack Size for back chain */
5440 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5441 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5442 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5444 emit_insn (gen_addv4si3 (sp, sp, temp3));
5445 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5448 static void
5449 spu_init_libfuncs (void)
5451 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5452 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5453 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5454 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5455 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5456 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5457 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5458 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5459 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5460 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5461 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5462 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5464 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5465 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5467 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5468 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5469 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5470 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5471 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5472 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5473 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5474 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5475 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5476 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5477 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5478 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5480 set_optab_libfunc (smul_optab, TImode, "__multi3");
5481 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5482 set_optab_libfunc (smod_optab, TImode, "__modti3");
5483 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5484 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5485 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5488 /* Make a subreg, stripping any existing subreg. We could possibly just
5489 call simplify_subreg, but in this case we know what we want. */
5491 spu_gen_subreg (machine_mode mode, rtx x)
5493 if (GET_CODE (x) == SUBREG)
5494 x = SUBREG_REG (x);
5495 if (GET_MODE (x) == mode)
5496 return x;
5497 return gen_rtx_SUBREG (mode, x, 0);
5500 static bool
5501 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5503 return (TYPE_MODE (type) == BLKmode
5504 && ((type) == 0
5505 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5506 || int_size_in_bytes (type) >
5507 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5510 /* Create the built-in types and functions */
5512 enum spu_function_code
5514 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5515 #include "spu-builtins.def"
5516 #undef DEF_BUILTIN
5517 NUM_SPU_BUILTINS
5520 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5522 struct spu_builtin_description spu_builtins[] = {
5523 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5524 {fcode, icode, name, type, params},
5525 #include "spu-builtins.def"
5526 #undef DEF_BUILTIN
5529 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5531 /* Returns the spu builtin decl for CODE. */
5533 static tree
5534 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5536 if (code >= NUM_SPU_BUILTINS)
5537 return error_mark_node;
5539 return spu_builtin_decls[code];
5543 static void
5544 spu_init_builtins (void)
5546 struct spu_builtin_description *d;
5547 unsigned int i;
5549 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5550 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5551 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5552 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5553 V4SF_type_node = build_vector_type (float_type_node, 4);
5554 V2DF_type_node = build_vector_type (double_type_node, 2);
5556 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5557 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5558 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5559 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5561 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5563 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5564 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5566 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5567 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5568 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5569 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5570 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5571 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5572 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5573 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5574 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5576 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5577 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5578 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5579 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5580 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5581 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5582 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5583 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5585 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5586 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5588 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5590 spu_builtin_types[SPU_BTI_PTR] =
5591 build_pointer_type (build_qualified_type
5592 (void_type_node,
5593 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5595 /* For each builtin we build a new prototype. The tree code will make
5596 sure nodes are shared. */
5597 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5599 tree p;
5600 char name[64]; /* build_function will make a copy. */
5601 int parm;
5603 if (d->name == 0)
5604 continue;
5606 /* Find last parm. */
5607 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5610 p = void_list_node;
5611 while (parm > 1)
5612 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5614 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5616 sprintf (name, "__builtin_%s", d->name);
5617 spu_builtin_decls[i] =
5618 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5619 if (d->fcode == SPU_MASK_FOR_LOAD)
5620 TREE_READONLY (spu_builtin_decls[i]) = 1;
5622 /* These builtins don't throw. */
5623 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5627 void
5628 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5630 static unsigned char arr[16] =
5631 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5633 rtx temp = gen_reg_rtx (Pmode);
5634 rtx temp2 = gen_reg_rtx (V4SImode);
5635 rtx temp3 = gen_reg_rtx (V4SImode);
5636 rtx pat = gen_reg_rtx (TImode);
5637 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5639 emit_move_insn (pat, array_to_constant (TImode, arr));
5641 /* Restore the sp. */
5642 emit_move_insn (temp, op1);
5643 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5645 /* Compute available stack size for sp. */
5646 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5647 emit_insn (gen_shufb (temp3, temp, temp, pat));
5649 emit_insn (gen_addv4si3 (sp, sp, temp3));
5650 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5654 spu_safe_dma (HOST_WIDE_INT channel)
5656 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5659 void
5660 spu_builtin_splats (rtx ops[])
5662 machine_mode mode = GET_MODE (ops[0]);
5663 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5665 unsigned char arr[16];
5666 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5667 emit_move_insn (ops[0], array_to_constant (mode, arr));
5669 else
5671 rtx reg = gen_reg_rtx (TImode);
5672 rtx shuf;
5673 if (GET_CODE (ops[1]) != REG
5674 && GET_CODE (ops[1]) != SUBREG)
5675 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5676 switch (mode)
5678 case V2DImode:
5679 case V2DFmode:
5680 shuf =
5681 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5682 TImode);
5683 break;
5684 case V4SImode:
5685 case V4SFmode:
5686 shuf =
5687 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5688 TImode);
5689 break;
5690 case V8HImode:
5691 shuf =
5692 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5693 TImode);
5694 break;
5695 case V16QImode:
5696 shuf =
5697 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5698 TImode);
5699 break;
5700 default:
5701 abort ();
5703 emit_move_insn (reg, shuf);
5704 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5708 void
5709 spu_builtin_extract (rtx ops[])
5711 machine_mode mode;
5712 rtx rot, from, tmp;
5714 mode = GET_MODE (ops[1]);
5716 if (GET_CODE (ops[2]) == CONST_INT)
5718 switch (mode)
5720 case V16QImode:
5721 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5722 break;
5723 case V8HImode:
5724 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5725 break;
5726 case V4SFmode:
5727 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5728 break;
5729 case V4SImode:
5730 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5731 break;
5732 case V2DImode:
5733 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5734 break;
5735 case V2DFmode:
5736 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5737 break;
5738 default:
5739 abort ();
5741 return;
5744 from = spu_gen_subreg (TImode, ops[1]);
5745 rot = gen_reg_rtx (TImode);
5746 tmp = gen_reg_rtx (SImode);
5748 switch (mode)
5750 case V16QImode:
5751 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5752 break;
5753 case V8HImode:
5754 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5755 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5756 break;
5757 case V4SFmode:
5758 case V4SImode:
5759 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5760 break;
5761 case V2DImode:
5762 case V2DFmode:
5763 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5764 break;
5765 default:
5766 abort ();
5768 emit_insn (gen_rotqby_ti (rot, from, tmp));
5770 emit_insn (gen_spu_convert (ops[0], rot));
5773 void
5774 spu_builtin_insert (rtx ops[])
5776 machine_mode mode = GET_MODE (ops[0]);
5777 machine_mode imode = GET_MODE_INNER (mode);
5778 rtx mask = gen_reg_rtx (TImode);
5779 rtx offset;
5781 if (GET_CODE (ops[3]) == CONST_INT)
5782 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5783 else
5785 offset = gen_reg_rtx (SImode);
5786 emit_insn (gen_mulsi3
5787 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5789 emit_insn (gen_cpat
5790 (mask, stack_pointer_rtx, offset,
5791 GEN_INT (GET_MODE_SIZE (imode))));
5792 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5795 void
5796 spu_builtin_promote (rtx ops[])
5798 machine_mode mode, imode;
5799 rtx rot, from, offset;
5800 HOST_WIDE_INT pos;
5802 mode = GET_MODE (ops[0]);
5803 imode = GET_MODE_INNER (mode);
5805 from = gen_reg_rtx (TImode);
5806 rot = spu_gen_subreg (TImode, ops[0]);
5808 emit_insn (gen_spu_convert (from, ops[1]));
5810 if (GET_CODE (ops[2]) == CONST_INT)
5812 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5813 if (GET_MODE_SIZE (imode) < 4)
5814 pos += 4 - GET_MODE_SIZE (imode);
5815 offset = GEN_INT (pos & 15);
5817 else
5819 offset = gen_reg_rtx (SImode);
5820 switch (mode)
5822 case V16QImode:
5823 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5824 break;
5825 case V8HImode:
5826 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5827 emit_insn (gen_addsi3 (offset, offset, offset));
5828 break;
5829 case V4SFmode:
5830 case V4SImode:
5831 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5832 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5833 break;
5834 case V2DImode:
5835 case V2DFmode:
5836 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5837 break;
5838 default:
5839 abort ();
5842 emit_insn (gen_rotqby_ti (rot, from, offset));
5845 static void
5846 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5848 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5849 rtx shuf = gen_reg_rtx (V4SImode);
5850 rtx insn = gen_reg_rtx (V4SImode);
5851 rtx shufc;
5852 rtx insnc;
5853 rtx mem;
5855 fnaddr = force_reg (SImode, fnaddr);
5856 cxt = force_reg (SImode, cxt);
5858 if (TARGET_LARGE_MEM)
5860 rtx rotl = gen_reg_rtx (V4SImode);
5861 rtx mask = gen_reg_rtx (V4SImode);
5862 rtx bi = gen_reg_rtx (SImode);
5863 static unsigned char const shufa[16] = {
5864 2, 3, 0, 1, 18, 19, 16, 17,
5865 0, 1, 2, 3, 16, 17, 18, 19
5867 static unsigned char const insna[16] = {
5868 0x41, 0, 0, 79,
5869 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5870 0x60, 0x80, 0, 79,
5871 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5874 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5875 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5877 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5878 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5879 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5880 emit_insn (gen_selb (insn, insnc, rotl, mask));
5882 mem = adjust_address (m_tramp, V4SImode, 0);
5883 emit_move_insn (mem, insn);
5885 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5886 mem = adjust_address (m_tramp, Pmode, 16);
5887 emit_move_insn (mem, bi);
5889 else
5891 rtx scxt = gen_reg_rtx (SImode);
5892 rtx sfnaddr = gen_reg_rtx (SImode);
5893 static unsigned char const insna[16] = {
5894 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5895 0x30, 0, 0, 0,
5896 0, 0, 0, 0,
5897 0, 0, 0, 0
5900 shufc = gen_reg_rtx (TImode);
5901 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5903 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5904 fits 18 bits and the last 4 are zeros. This will be true if
5905 the stack pointer is initialized to 0x3fff0 at program start,
5906 otherwise the ila instruction will be garbage. */
5908 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5909 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5910 emit_insn (gen_cpat
5911 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5912 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5913 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5915 mem = adjust_address (m_tramp, V4SImode, 0);
5916 emit_move_insn (mem, insn);
5918 emit_insn (gen_sync ());
5921 static bool
5922 spu_warn_func_return (tree decl)
5924 /* Naked functions are implemented entirely in assembly, including the
5925 return sequence, so suppress warnings about this. */
5926 return !spu_naked_function_p (decl);
5929 void
5930 spu_expand_sign_extend (rtx ops[])
5932 unsigned char arr[16];
5933 rtx pat = gen_reg_rtx (TImode);
5934 rtx sign, c;
5935 int i, last;
5936 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5937 if (GET_MODE (ops[1]) == QImode)
5939 sign = gen_reg_rtx (HImode);
5940 emit_insn (gen_extendqihi2 (sign, ops[1]));
5941 for (i = 0; i < 16; i++)
5942 arr[i] = 0x12;
5943 arr[last] = 0x13;
5945 else
5947 for (i = 0; i < 16; i++)
5948 arr[i] = 0x10;
5949 switch (GET_MODE (ops[1]))
5951 case HImode:
5952 sign = gen_reg_rtx (SImode);
5953 emit_insn (gen_extendhisi2 (sign, ops[1]));
5954 arr[last] = 0x03;
5955 arr[last - 1] = 0x02;
5956 break;
5957 case SImode:
5958 sign = gen_reg_rtx (SImode);
5959 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5960 for (i = 0; i < 4; i++)
5961 arr[last - i] = 3 - i;
5962 break;
5963 case DImode:
5964 sign = gen_reg_rtx (SImode);
5965 c = gen_reg_rtx (SImode);
5966 emit_insn (gen_spu_convert (c, ops[1]));
5967 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5968 for (i = 0; i < 8; i++)
5969 arr[last - i] = 7 - i;
5970 break;
5971 default:
5972 abort ();
5975 emit_move_insn (pat, array_to_constant (TImode, arr));
5976 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5979 /* expand vector initialization. If there are any constant parts,
5980 load constant parts first. Then load any non-constant parts. */
5981 void
5982 spu_expand_vector_init (rtx target, rtx vals)
5984 machine_mode mode = GET_MODE (target);
5985 int n_elts = GET_MODE_NUNITS (mode);
5986 int n_var = 0;
5987 bool all_same = true;
5988 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5989 int i;
5991 first = XVECEXP (vals, 0, 0);
5992 for (i = 0; i < n_elts; ++i)
5994 x = XVECEXP (vals, 0, i);
5995 if (!(CONST_INT_P (x)
5996 || GET_CODE (x) == CONST_DOUBLE
5997 || GET_CODE (x) == CONST_FIXED))
5998 ++n_var;
5999 else
6001 if (first_constant == NULL_RTX)
6002 first_constant = x;
6004 if (i > 0 && !rtx_equal_p (x, first))
6005 all_same = false;
6008 /* if all elements are the same, use splats to repeat elements */
6009 if (all_same)
6011 if (!CONSTANT_P (first)
6012 && !register_operand (first, GET_MODE (x)))
6013 first = force_reg (GET_MODE (first), first);
6014 emit_insn (gen_spu_splats (target, first));
6015 return;
6018 /* load constant parts */
6019 if (n_var != n_elts)
6021 if (n_var == 0)
6023 emit_move_insn (target,
6024 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6026 else
6028 rtx constant_parts_rtx = copy_rtx (vals);
6030 gcc_assert (first_constant != NULL_RTX);
6031 /* fill empty slots with the first constant, this increases
6032 our chance of using splats in the recursive call below. */
6033 for (i = 0; i < n_elts; ++i)
6035 x = XVECEXP (constant_parts_rtx, 0, i);
6036 if (!(CONST_INT_P (x)
6037 || GET_CODE (x) == CONST_DOUBLE
6038 || GET_CODE (x) == CONST_FIXED))
6039 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6042 spu_expand_vector_init (target, constant_parts_rtx);
6046 /* load variable parts */
6047 if (n_var != 0)
6049 rtx insert_operands[4];
6051 insert_operands[0] = target;
6052 insert_operands[2] = target;
6053 for (i = 0; i < n_elts; ++i)
6055 x = XVECEXP (vals, 0, i);
6056 if (!(CONST_INT_P (x)
6057 || GET_CODE (x) == CONST_DOUBLE
6058 || GET_CODE (x) == CONST_FIXED))
6060 if (!register_operand (x, GET_MODE (x)))
6061 x = force_reg (GET_MODE (x), x);
6062 insert_operands[1] = x;
6063 insert_operands[3] = GEN_INT (i);
6064 spu_builtin_insert (insert_operands);
6070 /* Return insn index for the vector compare instruction for given CODE,
6071 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6073 static int
6074 get_vec_cmp_insn (enum rtx_code code,
6075 machine_mode dest_mode,
6076 machine_mode op_mode)
6079 switch (code)
6081 case EQ:
6082 if (dest_mode == V16QImode && op_mode == V16QImode)
6083 return CODE_FOR_ceq_v16qi;
6084 if (dest_mode == V8HImode && op_mode == V8HImode)
6085 return CODE_FOR_ceq_v8hi;
6086 if (dest_mode == V4SImode && op_mode == V4SImode)
6087 return CODE_FOR_ceq_v4si;
6088 if (dest_mode == V4SImode && op_mode == V4SFmode)
6089 return CODE_FOR_ceq_v4sf;
6090 if (dest_mode == V2DImode && op_mode == V2DFmode)
6091 return CODE_FOR_ceq_v2df;
6092 break;
6093 case GT:
6094 if (dest_mode == V16QImode && op_mode == V16QImode)
6095 return CODE_FOR_cgt_v16qi;
6096 if (dest_mode == V8HImode && op_mode == V8HImode)
6097 return CODE_FOR_cgt_v8hi;
6098 if (dest_mode == V4SImode && op_mode == V4SImode)
6099 return CODE_FOR_cgt_v4si;
6100 if (dest_mode == V4SImode && op_mode == V4SFmode)
6101 return CODE_FOR_cgt_v4sf;
6102 if (dest_mode == V2DImode && op_mode == V2DFmode)
6103 return CODE_FOR_cgt_v2df;
6104 break;
6105 case GTU:
6106 if (dest_mode == V16QImode && op_mode == V16QImode)
6107 return CODE_FOR_clgt_v16qi;
6108 if (dest_mode == V8HImode && op_mode == V8HImode)
6109 return CODE_FOR_clgt_v8hi;
6110 if (dest_mode == V4SImode && op_mode == V4SImode)
6111 return CODE_FOR_clgt_v4si;
6112 break;
6113 default:
6114 break;
6116 return -1;
6119 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6120 DMODE is expected destination mode. This is a recursive function. */
6122 static rtx
6123 spu_emit_vector_compare (enum rtx_code rcode,
6124 rtx op0, rtx op1,
6125 machine_mode dmode)
6127 int vec_cmp_insn;
6128 rtx mask;
6129 machine_mode dest_mode;
6130 machine_mode op_mode = GET_MODE (op1);
6132 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6134 /* Floating point vector compare instructions uses destination V4SImode.
6135 Double floating point vector compare instructions uses destination V2DImode.
6136 Move destination to appropriate mode later. */
6137 if (dmode == V4SFmode)
6138 dest_mode = V4SImode;
6139 else if (dmode == V2DFmode)
6140 dest_mode = V2DImode;
6141 else
6142 dest_mode = dmode;
6144 mask = gen_reg_rtx (dest_mode);
6145 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6147 if (vec_cmp_insn == -1)
6149 bool swap_operands = false;
6150 bool try_again = false;
6151 switch (rcode)
6153 case LT:
6154 rcode = GT;
6155 swap_operands = true;
6156 try_again = true;
6157 break;
6158 case LTU:
6159 rcode = GTU;
6160 swap_operands = true;
6161 try_again = true;
6162 break;
6163 case NE:
6164 case UNEQ:
6165 case UNLE:
6166 case UNLT:
6167 case UNGE:
6168 case UNGT:
6169 case UNORDERED:
6170 /* Treat A != B as ~(A==B). */
6172 enum rtx_code rev_code;
6173 enum insn_code nor_code;
6174 rtx rev_mask;
6176 rev_code = reverse_condition_maybe_unordered (rcode);
6177 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6179 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6180 gcc_assert (nor_code != CODE_FOR_nothing);
6181 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6182 if (dmode != dest_mode)
6184 rtx temp = gen_reg_rtx (dest_mode);
6185 convert_move (temp, mask, 0);
6186 return temp;
6188 return mask;
6190 break;
6191 case GE:
6192 case GEU:
6193 case LE:
6194 case LEU:
6195 /* Try GT/GTU/LT/LTU OR EQ */
6197 rtx c_rtx, eq_rtx;
6198 enum insn_code ior_code;
6199 enum rtx_code new_code;
6201 switch (rcode)
6203 case GE: new_code = GT; break;
6204 case GEU: new_code = GTU; break;
6205 case LE: new_code = LT; break;
6206 case LEU: new_code = LTU; break;
6207 default:
6208 gcc_unreachable ();
6211 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6212 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6214 ior_code = optab_handler (ior_optab, dest_mode);
6215 gcc_assert (ior_code != CODE_FOR_nothing);
6216 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6217 if (dmode != dest_mode)
6219 rtx temp = gen_reg_rtx (dest_mode);
6220 convert_move (temp, mask, 0);
6221 return temp;
6223 return mask;
6225 break;
6226 case LTGT:
6227 /* Try LT OR GT */
6229 rtx lt_rtx, gt_rtx;
6230 enum insn_code ior_code;
6232 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6233 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6235 ior_code = optab_handler (ior_optab, dest_mode);
6236 gcc_assert (ior_code != CODE_FOR_nothing);
6237 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6238 if (dmode != dest_mode)
6240 rtx temp = gen_reg_rtx (dest_mode);
6241 convert_move (temp, mask, 0);
6242 return temp;
6244 return mask;
6246 break;
6247 case ORDERED:
6248 /* Implement as (A==A) & (B==B) */
6250 rtx a_rtx, b_rtx;
6251 enum insn_code and_code;
6253 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6254 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6256 and_code = optab_handler (and_optab, dest_mode);
6257 gcc_assert (and_code != CODE_FOR_nothing);
6258 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6259 if (dmode != dest_mode)
6261 rtx temp = gen_reg_rtx (dest_mode);
6262 convert_move (temp, mask, 0);
6263 return temp;
6265 return mask;
6267 break;
6268 default:
6269 gcc_unreachable ();
6272 /* You only get two chances. */
6273 if (try_again)
6274 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6276 gcc_assert (vec_cmp_insn != -1);
6278 if (swap_operands)
6280 rtx tmp;
6281 tmp = op0;
6282 op0 = op1;
6283 op1 = tmp;
6287 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6288 if (dmode != dest_mode)
6290 rtx temp = gen_reg_rtx (dest_mode);
6291 convert_move (temp, mask, 0);
6292 return temp;
6294 return mask;
6298 /* Emit vector conditional expression.
6299 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6300 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6303 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6304 rtx cond, rtx cc_op0, rtx cc_op1)
6306 machine_mode dest_mode = GET_MODE (dest);
6307 enum rtx_code rcode = GET_CODE (cond);
6308 rtx mask;
6310 /* Get the vector mask for the given relational operations. */
6311 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6313 emit_insn(gen_selb (dest, op2, op1, mask));
6315 return 1;
6318 static rtx
6319 spu_force_reg (machine_mode mode, rtx op)
6321 rtx x, r;
6322 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6324 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6325 || GET_MODE (op) == BLKmode)
6326 return force_reg (mode, convert_to_mode (mode, op, 0));
6327 abort ();
6330 r = force_reg (GET_MODE (op), op);
6331 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6333 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6334 if (x)
6335 return x;
6338 x = gen_reg_rtx (mode);
6339 emit_insn (gen_spu_convert (x, r));
6340 return x;
6343 static void
6344 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6346 HOST_WIDE_INT v = 0;
6347 int lsbits;
6348 /* Check the range of immediate operands. */
6349 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6351 int range = p - SPU_BTI_7;
6353 if (!CONSTANT_P (op))
6354 error ("%s expects an integer literal in the range [%d, %d]",
6355 d->name,
6356 spu_builtin_range[range].low, spu_builtin_range[range].high);
6358 if (GET_CODE (op) == CONST
6359 && (GET_CODE (XEXP (op, 0)) == PLUS
6360 || GET_CODE (XEXP (op, 0)) == MINUS))
6362 v = INTVAL (XEXP (XEXP (op, 0), 1));
6363 op = XEXP (XEXP (op, 0), 0);
6365 else if (GET_CODE (op) == CONST_INT)
6366 v = INTVAL (op);
6367 else if (GET_CODE (op) == CONST_VECTOR
6368 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6369 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6371 /* The default for v is 0 which is valid in every range. */
6372 if (v < spu_builtin_range[range].low
6373 || v > spu_builtin_range[range].high)
6374 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6375 d->name,
6376 spu_builtin_range[range].low, spu_builtin_range[range].high,
6379 switch (p)
6381 case SPU_BTI_S10_4:
6382 lsbits = 4;
6383 break;
6384 case SPU_BTI_U16_2:
6385 /* This is only used in lqa, and stqa. Even though the insns
6386 encode 16 bits of the address (all but the 2 least
6387 significant), only 14 bits are used because it is masked to
6388 be 16 byte aligned. */
6389 lsbits = 4;
6390 break;
6391 case SPU_BTI_S16_2:
6392 /* This is used for lqr and stqr. */
6393 lsbits = 2;
6394 break;
6395 default:
6396 lsbits = 0;
6399 if (GET_CODE (op) == LABEL_REF
6400 || (GET_CODE (op) == SYMBOL_REF
6401 && SYMBOL_REF_FUNCTION_P (op))
6402 || (v & ((1 << lsbits) - 1)) != 0)
6403 warning (0, "%d least significant bits of %s are ignored", lsbits,
6404 d->name);
6409 static int
6410 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6411 rtx target, rtx ops[])
6413 enum insn_code icode = (enum insn_code) d->icode;
6414 int i = 0, a;
6416 /* Expand the arguments into rtl. */
6418 if (d->parm[0] != SPU_BTI_VOID)
6419 ops[i++] = target;
6421 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6423 tree arg = CALL_EXPR_ARG (exp, a);
6424 if (arg == 0)
6425 abort ();
6426 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6429 gcc_assert (i == insn_data[icode].n_generator_args);
6430 return i;
6433 static rtx
6434 spu_expand_builtin_1 (struct spu_builtin_description *d,
6435 tree exp, rtx target)
6437 rtx pat;
6438 rtx ops[8];
6439 enum insn_code icode = (enum insn_code) d->icode;
6440 machine_mode mode, tmode;
6441 int i, p;
6442 int n_operands;
6443 tree return_type;
6445 /* Set up ops[] with values from arglist. */
6446 n_operands = expand_builtin_args (d, exp, target, ops);
6448 /* Handle the target operand which must be operand 0. */
6449 i = 0;
6450 if (d->parm[0] != SPU_BTI_VOID)
6453 /* We prefer the mode specified for the match_operand otherwise
6454 use the mode from the builtin function prototype. */
6455 tmode = insn_data[d->icode].operand[0].mode;
6456 if (tmode == VOIDmode)
6457 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6459 /* Try to use target because not using it can lead to extra copies
6460 and when we are using all of the registers extra copies leads
6461 to extra spills. */
6462 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6463 ops[0] = target;
6464 else
6465 target = ops[0] = gen_reg_rtx (tmode);
6467 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6468 abort ();
6470 i++;
6473 if (d->fcode == SPU_MASK_FOR_LOAD)
6475 machine_mode mode = insn_data[icode].operand[1].mode;
6476 tree arg;
6477 rtx addr, op, pat;
6479 /* get addr */
6480 arg = CALL_EXPR_ARG (exp, 0);
6481 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6482 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6483 addr = memory_address (mode, op);
6485 /* negate addr */
6486 op = gen_reg_rtx (GET_MODE (addr));
6487 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6488 op = gen_rtx_MEM (mode, op);
6490 pat = GEN_FCN (icode) (target, op);
6491 if (!pat)
6492 return 0;
6493 emit_insn (pat);
6494 return target;
6497 /* Ignore align_hint, but still expand it's args in case they have
6498 side effects. */
6499 if (icode == CODE_FOR_spu_align_hint)
6500 return 0;
6502 /* Handle the rest of the operands. */
6503 for (p = 1; i < n_operands; i++, p++)
6505 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6506 mode = insn_data[d->icode].operand[i].mode;
6507 else
6508 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6510 /* mode can be VOIDmode here for labels */
6512 /* For specific intrinsics with an immediate operand, e.g.,
6513 si_ai(), we sometimes need to convert the scalar argument to a
6514 vector argument by splatting the scalar. */
6515 if (VECTOR_MODE_P (mode)
6516 && (GET_CODE (ops[i]) == CONST_INT
6517 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6518 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6520 if (GET_CODE (ops[i]) == CONST_INT)
6521 ops[i] = spu_const (mode, INTVAL (ops[i]));
6522 else
6524 rtx reg = gen_reg_rtx (mode);
6525 machine_mode imode = GET_MODE_INNER (mode);
6526 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6527 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6528 if (imode != GET_MODE (ops[i]))
6529 ops[i] = convert_to_mode (imode, ops[i],
6530 TYPE_UNSIGNED (spu_builtin_types
6531 [d->parm[i]]));
6532 emit_insn (gen_spu_splats (reg, ops[i]));
6533 ops[i] = reg;
6537 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6539 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6540 ops[i] = spu_force_reg (mode, ops[i]);
6543 switch (n_operands)
6545 case 0:
6546 pat = GEN_FCN (icode) (0);
6547 break;
6548 case 1:
6549 pat = GEN_FCN (icode) (ops[0]);
6550 break;
6551 case 2:
6552 pat = GEN_FCN (icode) (ops[0], ops[1]);
6553 break;
6554 case 3:
6555 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6556 break;
6557 case 4:
6558 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6559 break;
6560 case 5:
6561 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6562 break;
6563 case 6:
6564 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6565 break;
6566 default:
6567 abort ();
6570 if (!pat)
6571 abort ();
6573 if (d->type == B_CALL || d->type == B_BISLED)
6574 emit_call_insn (pat);
6575 else if (d->type == B_JUMP)
6577 emit_jump_insn (pat);
6578 emit_barrier ();
6580 else
6581 emit_insn (pat);
6583 return_type = spu_builtin_types[d->parm[0]];
6584 if (d->parm[0] != SPU_BTI_VOID
6585 && GET_MODE (target) != TYPE_MODE (return_type))
6587 /* target is the return value. It should always be the mode of
6588 the builtin function prototype. */
6589 target = spu_force_reg (TYPE_MODE (return_type), target);
6592 return target;
6596 spu_expand_builtin (tree exp,
6597 rtx target,
6598 rtx subtarget ATTRIBUTE_UNUSED,
6599 machine_mode mode ATTRIBUTE_UNUSED,
6600 int ignore ATTRIBUTE_UNUSED)
6602 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6603 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6604 struct spu_builtin_description *d;
6606 if (fcode < NUM_SPU_BUILTINS)
6608 d = &spu_builtins[fcode];
6610 return spu_expand_builtin_1 (d, exp, target);
6612 abort ();
6615 /* Implement targetm.vectorize.builtin_mask_for_load. */
6616 static tree
6617 spu_builtin_mask_for_load (void)
6619 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6622 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6623 static int
6624 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6625 tree vectype,
6626 int misalign ATTRIBUTE_UNUSED)
6628 unsigned elements;
6630 switch (type_of_cost)
6632 case scalar_stmt:
6633 case vector_stmt:
6634 case vector_load:
6635 case vector_store:
6636 case vec_to_scalar:
6637 case scalar_to_vec:
6638 case cond_branch_not_taken:
6639 case vec_perm:
6640 case vec_promote_demote:
6641 return 1;
6643 case scalar_store:
6644 return 10;
6646 case scalar_load:
6647 /* Load + rotate. */
6648 return 2;
6650 case unaligned_load:
6651 return 2;
6653 case cond_branch_taken:
6654 return 6;
6656 case vec_construct:
6657 elements = TYPE_VECTOR_SUBPARTS (vectype);
6658 return elements / 2 + 1;
6660 default:
6661 gcc_unreachable ();
6665 /* Implement targetm.vectorize.init_cost. */
6667 static void *
6668 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6670 unsigned *cost = XNEWVEC (unsigned, 3);
6671 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6672 return cost;
6675 /* Implement targetm.vectorize.add_stmt_cost. */
6677 static unsigned
6678 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6679 struct _stmt_vec_info *stmt_info, int misalign,
6680 enum vect_cost_model_location where)
6682 unsigned *cost = (unsigned *) data;
6683 unsigned retval = 0;
6685 if (flag_vect_cost_model)
6687 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6688 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6690 /* Statements in an inner loop relative to the loop being
6691 vectorized are weighted more heavily. The value here is
6692 arbitrary and could potentially be improved with analysis. */
6693 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6694 count *= 50; /* FIXME. */
6696 retval = (unsigned) (count * stmt_cost);
6697 cost[where] += retval;
6700 return retval;
6703 /* Implement targetm.vectorize.finish_cost. */
6705 static void
6706 spu_finish_cost (void *data, unsigned *prologue_cost,
6707 unsigned *body_cost, unsigned *epilogue_cost)
6709 unsigned *cost = (unsigned *) data;
6710 *prologue_cost = cost[vect_prologue];
6711 *body_cost = cost[vect_body];
6712 *epilogue_cost = cost[vect_epilogue];
6715 /* Implement targetm.vectorize.destroy_cost_data. */
6717 static void
6718 spu_destroy_cost_data (void *data)
6720 free (data);
6723 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6724 after applying N number of iterations. This routine does not determine
6725 how may iterations are required to reach desired alignment. */
6727 static bool
6728 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6730 if (is_packed)
6731 return false;
6733 /* All other types are naturally aligned. */
6734 return true;
6737 /* Return the appropriate mode for a named address pointer. */
6738 static machine_mode
6739 spu_addr_space_pointer_mode (addr_space_t addrspace)
6741 switch (addrspace)
6743 case ADDR_SPACE_GENERIC:
6744 return ptr_mode;
6745 case ADDR_SPACE_EA:
6746 return EAmode;
6747 default:
6748 gcc_unreachable ();
6752 /* Return the appropriate mode for a named address address. */
6753 static machine_mode
6754 spu_addr_space_address_mode (addr_space_t addrspace)
6756 switch (addrspace)
6758 case ADDR_SPACE_GENERIC:
6759 return Pmode;
6760 case ADDR_SPACE_EA:
6761 return EAmode;
6762 default:
6763 gcc_unreachable ();
6767 /* Determine if one named address space is a subset of another. */
6769 static bool
6770 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6772 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6773 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6775 if (subset == superset)
6776 return true;
6778 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6779 being subsets but instead as disjoint address spaces. */
6780 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6781 return false;
6783 else
6784 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6787 /* Convert from one address space to another. */
6788 static rtx
6789 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6791 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6792 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6794 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6795 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6797 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6799 rtx result, ls;
6801 ls = gen_const_mem (DImode,
6802 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6803 set_mem_align (ls, 128);
6805 result = gen_reg_rtx (Pmode);
6806 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6807 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6808 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6809 ls, const0_rtx, Pmode, 1);
6811 emit_insn (gen_subsi3 (result, op, ls));
6813 return result;
6816 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6818 rtx result, ls;
6820 ls = gen_const_mem (DImode,
6821 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6822 set_mem_align (ls, 128);
6824 result = gen_reg_rtx (EAmode);
6825 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6826 op = force_reg (Pmode, op);
6827 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6828 ls, const0_rtx, EAmode, 1);
6829 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6831 if (EAmode == SImode)
6832 emit_insn (gen_addsi3 (result, op, ls));
6833 else
6834 emit_insn (gen_adddi3 (result, op, ls));
6836 return result;
6839 else
6840 gcc_unreachable ();
6844 /* Count the total number of instructions in each pipe and return the
6845 maximum, which is used as the Minimum Iteration Interval (MII)
6846 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6847 -2 are instructions that can go in pipe0 or pipe1. */
6848 static int
6849 spu_sms_res_mii (struct ddg *g)
6851 int i;
6852 unsigned t[4] = {0, 0, 0, 0};
6854 for (i = 0; i < g->num_nodes; i++)
6856 rtx_insn *insn = g->nodes[i].insn;
6857 int p = get_pipe (insn) + 2;
6859 gcc_assert (p >= 0);
6860 gcc_assert (p < 4);
6862 t[p]++;
6863 if (dump_file && INSN_P (insn))
6864 fprintf (dump_file, "i%d %s %d %d\n",
6865 INSN_UID (insn),
6866 insn_data[INSN_CODE(insn)].name,
6867 p, t[p]);
6869 if (dump_file)
6870 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6872 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6876 void
6877 spu_init_expanders (void)
6879 if (cfun)
6881 rtx r0, r1;
6882 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6883 frame_pointer_needed is true. We don't know that until we're
6884 expanding the prologue. */
6885 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6887 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6888 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6889 to be treated as aligned, so generate them here. */
6890 r0 = gen_reg_rtx (SImode);
6891 r1 = gen_reg_rtx (SImode);
6892 mark_reg_pointer (r0, 128);
6893 mark_reg_pointer (r1, 128);
6894 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6895 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6899 static machine_mode
6900 spu_libgcc_cmp_return_mode (void)
6903 /* For SPU word mode is TI mode so it is better to use SImode
6904 for compare returns. */
6905 return SImode;
6908 static machine_mode
6909 spu_libgcc_shift_count_mode (void)
6911 /* For SPU word mode is TI mode so it is better to use SImode
6912 for shift counts. */
6913 return SImode;
6916 /* Implement targetm.section_type_flags. */
6917 static unsigned int
6918 spu_section_type_flags (tree decl, const char *name, int reloc)
6920 /* .toe needs to have type @nobits. */
6921 if (strcmp (name, ".toe") == 0)
6922 return SECTION_BSS;
6923 /* Don't load _ea into the current address space. */
6924 if (strcmp (name, "._ea") == 0)
6925 return SECTION_WRITE | SECTION_DEBUG;
6926 return default_section_type_flags (decl, name, reloc);
6929 /* Implement targetm.select_section. */
6930 static section *
6931 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6933 /* Variables and constants defined in the __ea address space
6934 go into a special section named "._ea". */
6935 if (TREE_TYPE (decl) != error_mark_node
6936 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6938 /* We might get called with string constants, but get_named_section
6939 doesn't like them as they are not DECLs. Also, we need to set
6940 flags in that case. */
6941 if (!DECL_P (decl))
6942 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6944 return get_named_section (decl, "._ea", reloc);
6947 return default_elf_select_section (decl, reloc, align);
6950 /* Implement targetm.unique_section. */
6951 static void
6952 spu_unique_section (tree decl, int reloc)
6954 /* We don't support unique section names in the __ea address
6955 space for now. */
6956 if (TREE_TYPE (decl) != error_mark_node
6957 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6958 return;
6960 default_unique_section (decl, reloc);
6963 /* Generate a constant or register which contains 2^SCALE. We assume
6964 the result is valid for MODE. Currently, MODE must be V4SFmode and
6965 SCALE must be SImode. */
6967 spu_gen_exp2 (machine_mode mode, rtx scale)
6969 gcc_assert (mode == V4SFmode);
6970 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6971 if (GET_CODE (scale) != CONST_INT)
6973 /* unsigned int exp = (127 + scale) << 23;
6974 __vector float m = (__vector float) spu_splats (exp); */
6975 rtx reg = force_reg (SImode, scale);
6976 rtx exp = gen_reg_rtx (SImode);
6977 rtx mul = gen_reg_rtx (mode);
6978 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6979 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6980 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6981 return mul;
6983 else
6985 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6986 unsigned char arr[16];
6987 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6988 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6989 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6990 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6991 return array_to_constant (mode, arr);
6995 /* After reload, just change the convert into a move instruction
6996 or a dead instruction. */
6997 void
6998 spu_split_convert (rtx ops[])
7000 if (REGNO (ops[0]) == REGNO (ops[1]))
7001 emit_note (NOTE_INSN_DELETED);
7002 else
7004 /* Use TImode always as this might help hard reg copyprop. */
7005 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7006 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7007 emit_insn (gen_move_insn (op0, op1));
7011 void
7012 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7014 fprintf (file, "# profile\n");
7015 fprintf (file, "brsl $75, _mcount\n");
7018 /* Implement targetm.ref_may_alias_errno. */
7019 static bool
7020 spu_ref_may_alias_errno (ao_ref *ref)
7022 tree base = ao_ref_base (ref);
7024 /* With SPU newlib, errno is defined as something like
7025 _impure_data._errno
7026 The default implementation of this target macro does not
7027 recognize such expressions, so special-code for it here. */
7029 if (TREE_CODE (base) == VAR_DECL
7030 && !TREE_STATIC (base)
7031 && DECL_EXTERNAL (base)
7032 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7033 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7034 "_impure_data") == 0
7035 /* _errno is the first member of _impure_data. */
7036 && ref->offset == 0)
7037 return true;
7039 return default_ref_may_alias_errno (ref);
7042 /* Output thunk to FILE that implements a C++ virtual function call (with
7043 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7044 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7045 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7046 relative to the resulting this pointer. */
7048 static void
7049 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7050 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7051 tree function)
7053 rtx op[8];
7055 /* Make sure unwind info is emitted for the thunk if needed. */
7056 final_start_function (emit_barrier (), file, 1);
7058 /* Operand 0 is the target function. */
7059 op[0] = XEXP (DECL_RTL (function), 0);
7061 /* Operand 1 is the 'this' pointer. */
7062 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7063 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7064 else
7065 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7067 /* Operands 2/3 are the low/high halfwords of delta. */
7068 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7069 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7071 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7072 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7073 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7075 /* Operands 6/7 are temporary registers. */
7076 op[6] = gen_rtx_REG (Pmode, 79);
7077 op[7] = gen_rtx_REG (Pmode, 78);
7079 /* Add DELTA to this pointer. */
7080 if (delta)
7082 if (delta >= -0x200 && delta < 0x200)
7083 output_asm_insn ("ai\t%1,%1,%2", op);
7084 else if (delta >= -0x8000 && delta < 0x8000)
7086 output_asm_insn ("il\t%6,%2", op);
7087 output_asm_insn ("a\t%1,%1,%6", op);
7089 else
7091 output_asm_insn ("ilhu\t%6,%3", op);
7092 output_asm_insn ("iohl\t%6,%2", op);
7093 output_asm_insn ("a\t%1,%1,%6", op);
7097 /* Perform vcall adjustment. */
7098 if (vcall_offset)
7100 output_asm_insn ("lqd\t%7,0(%1)", op);
7101 output_asm_insn ("rotqby\t%7,%7,%1", op);
7103 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7104 output_asm_insn ("ai\t%7,%7,%4", op);
7105 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7107 output_asm_insn ("il\t%6,%4", op);
7108 output_asm_insn ("a\t%7,%7,%6", op);
7110 else
7112 output_asm_insn ("ilhu\t%6,%5", op);
7113 output_asm_insn ("iohl\t%6,%4", op);
7114 output_asm_insn ("a\t%7,%7,%6", op);
7117 output_asm_insn ("lqd\t%6,0(%7)", op);
7118 output_asm_insn ("rotqby\t%6,%6,%7", op);
7119 output_asm_insn ("a\t%1,%1,%6", op);
7122 /* Jump to target. */
7123 output_asm_insn ("br\t%0", op);
7125 final_end_function ();
7128 /* Canonicalize a comparison from one we don't have to one we do have. */
7129 static void
7130 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7131 bool op0_preserve_value)
7133 if (!op0_preserve_value
7134 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7136 rtx tem = *op0;
7137 *op0 = *op1;
7138 *op1 = tem;
7139 *code = (int)swap_condition ((enum rtx_code)*code);
7143 /* Table of machine attributes. */
7144 static const struct attribute_spec spu_attribute_table[] =
7146 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7147 affects_type_identity } */
7148 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7149 false },
7150 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7151 false },
7152 { NULL, 0, 0, false, false, false, NULL, false }
7155 /* TARGET overrides. */
7157 #undef TARGET_ADDR_SPACE_POINTER_MODE
7158 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7160 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7161 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7163 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7164 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7165 spu_addr_space_legitimate_address_p
7167 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7168 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7170 #undef TARGET_ADDR_SPACE_SUBSET_P
7171 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7173 #undef TARGET_ADDR_SPACE_CONVERT
7174 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7176 #undef TARGET_INIT_BUILTINS
7177 #define TARGET_INIT_BUILTINS spu_init_builtins
7178 #undef TARGET_BUILTIN_DECL
7179 #define TARGET_BUILTIN_DECL spu_builtin_decl
7181 #undef TARGET_EXPAND_BUILTIN
7182 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7184 #undef TARGET_UNWIND_WORD_MODE
7185 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7187 #undef TARGET_LEGITIMIZE_ADDRESS
7188 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7190 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7191 and .quad for the debugger. When it is known that the assembler is fixed,
7192 these can be removed. */
7193 #undef TARGET_ASM_UNALIGNED_SI_OP
7194 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7196 #undef TARGET_ASM_ALIGNED_DI_OP
7197 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7199 /* The .8byte directive doesn't seem to work well for a 32 bit
7200 architecture. */
7201 #undef TARGET_ASM_UNALIGNED_DI_OP
7202 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7204 #undef TARGET_RTX_COSTS
7205 #define TARGET_RTX_COSTS spu_rtx_costs
7207 #undef TARGET_ADDRESS_COST
7208 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7210 #undef TARGET_SCHED_ISSUE_RATE
7211 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7213 #undef TARGET_SCHED_INIT_GLOBAL
7214 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7216 #undef TARGET_SCHED_INIT
7217 #define TARGET_SCHED_INIT spu_sched_init
7219 #undef TARGET_SCHED_VARIABLE_ISSUE
7220 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7222 #undef TARGET_SCHED_REORDER
7223 #define TARGET_SCHED_REORDER spu_sched_reorder
7225 #undef TARGET_SCHED_REORDER2
7226 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7228 #undef TARGET_SCHED_ADJUST_COST
7229 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7231 #undef TARGET_ATTRIBUTE_TABLE
7232 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7234 #undef TARGET_ASM_INTEGER
7235 #define TARGET_ASM_INTEGER spu_assemble_integer
7237 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7238 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7240 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7241 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7243 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7244 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7246 #undef TARGET_ASM_GLOBALIZE_LABEL
7247 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7249 #undef TARGET_PASS_BY_REFERENCE
7250 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7252 #undef TARGET_FUNCTION_ARG
7253 #define TARGET_FUNCTION_ARG spu_function_arg
7255 #undef TARGET_FUNCTION_ARG_ADVANCE
7256 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7258 #undef TARGET_MUST_PASS_IN_STACK
7259 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7261 #undef TARGET_BUILD_BUILTIN_VA_LIST
7262 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7264 #undef TARGET_EXPAND_BUILTIN_VA_START
7265 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7267 #undef TARGET_SETUP_INCOMING_VARARGS
7268 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7270 #undef TARGET_MACHINE_DEPENDENT_REORG
7271 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7273 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7274 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7276 #undef TARGET_INIT_LIBFUNCS
7277 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7279 #undef TARGET_RETURN_IN_MEMORY
7280 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7282 #undef TARGET_ENCODE_SECTION_INFO
7283 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7285 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7286 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7288 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7289 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7291 #undef TARGET_VECTORIZE_INIT_COST
7292 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7294 #undef TARGET_VECTORIZE_ADD_STMT_COST
7295 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7297 #undef TARGET_VECTORIZE_FINISH_COST
7298 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7300 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7301 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7303 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7304 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7306 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7307 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7309 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7310 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7312 #undef TARGET_SCHED_SMS_RES_MII
7313 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7315 #undef TARGET_SECTION_TYPE_FLAGS
7316 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7318 #undef TARGET_ASM_SELECT_SECTION
7319 #define TARGET_ASM_SELECT_SECTION spu_select_section
7321 #undef TARGET_ASM_UNIQUE_SECTION
7322 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7324 #undef TARGET_LEGITIMATE_ADDRESS_P
7325 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7327 #undef TARGET_LEGITIMATE_CONSTANT_P
7328 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7330 #undef TARGET_TRAMPOLINE_INIT
7331 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7333 #undef TARGET_WARN_FUNC_RETURN
7334 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7336 #undef TARGET_OPTION_OVERRIDE
7337 #define TARGET_OPTION_OVERRIDE spu_option_override
7339 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7340 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7342 #undef TARGET_REF_MAY_ALIAS_ERRNO
7343 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7345 #undef TARGET_ASM_OUTPUT_MI_THUNK
7346 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7347 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7348 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7350 /* Variable tracking should be run after all optimizations which
7351 change order of insns. It also needs a valid CFG. */
7352 #undef TARGET_DELAY_VARTRACK
7353 #define TARGET_DELAY_VARTRACK true
7355 #undef TARGET_CANONICALIZE_COMPARISON
7356 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7358 #undef TARGET_CAN_USE_DOLOOP_P
7359 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7361 struct gcc_target targetm = TARGET_INITIALIZER;
7363 #include "gt-spu.h"