gcc/ada/
[official-gcc.git] / gcc / config / spu / spu.c
blobeef42f0970c8ce6adbc3340347366112574fa654
1 /* Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "tree.h"
31 #include "stringpool.h"
32 #include "stor-layout.h"
33 #include "calls.h"
34 #include "varasm.h"
35 #include "expr.h"
36 #include "optabs.h"
37 #include "except.h"
38 #include "hashtab.h"
39 #include "hash-set.h"
40 #include "vec.h"
41 #include "machmode.h"
42 #include "input.h"
43 #include "function.h"
44 #include "output.h"
45 #include "predict.h"
46 #include "dominance.h"
47 #include "cfg.h"
48 #include "cfgrtl.h"
49 #include "cfganal.h"
50 #include "lcm.h"
51 #include "cfgbuild.h"
52 #include "cfgcleanup.h"
53 #include "basic-block.h"
54 #include "diagnostic-core.h"
55 #include "ggc.h"
56 #include "tm_p.h"
57 #include "target.h"
58 #include "target-def.h"
59 #include "langhooks.h"
60 #include "reload.h"
61 #include "sched-int.h"
62 #include "params.h"
63 #include "hash-table.h"
64 #include "tree-ssa-alias.h"
65 #include "internal-fn.h"
66 #include "gimple-fold.h"
67 #include "tree-eh.h"
68 #include "gimple-expr.h"
69 #include "is-a.h"
70 #include "gimple.h"
71 #include "gimplify.h"
72 #include "tm-constrs.h"
73 #include "sbitmap.h"
74 #include "df.h"
75 #include "ddg.h"
76 #include "timevar.h"
77 #include "dumpfile.h"
78 #include "cfgloop.h"
79 #include "builtins.h"
80 #include "rtl-iter.h"
82 /* Builtin types, data and prototypes. */
84 enum spu_builtin_type_index
86 SPU_BTI_END_OF_PARAMS,
88 /* We create new type nodes for these. */
89 SPU_BTI_V16QI,
90 SPU_BTI_V8HI,
91 SPU_BTI_V4SI,
92 SPU_BTI_V2DI,
93 SPU_BTI_V4SF,
94 SPU_BTI_V2DF,
95 SPU_BTI_UV16QI,
96 SPU_BTI_UV8HI,
97 SPU_BTI_UV4SI,
98 SPU_BTI_UV2DI,
100 /* A 16-byte type. (Implemented with V16QI_type_node) */
101 SPU_BTI_QUADWORD,
103 /* These all correspond to intSI_type_node */
104 SPU_BTI_7,
105 SPU_BTI_S7,
106 SPU_BTI_U7,
107 SPU_BTI_S10,
108 SPU_BTI_S10_4,
109 SPU_BTI_U14,
110 SPU_BTI_16,
111 SPU_BTI_S16,
112 SPU_BTI_S16_2,
113 SPU_BTI_U16,
114 SPU_BTI_U16_2,
115 SPU_BTI_U18,
117 /* These correspond to the standard types */
118 SPU_BTI_INTQI,
119 SPU_BTI_INTHI,
120 SPU_BTI_INTSI,
121 SPU_BTI_INTDI,
123 SPU_BTI_UINTQI,
124 SPU_BTI_UINTHI,
125 SPU_BTI_UINTSI,
126 SPU_BTI_UINTDI,
128 SPU_BTI_FLOAT,
129 SPU_BTI_DOUBLE,
131 SPU_BTI_VOID,
132 SPU_BTI_PTR,
134 SPU_BTI_MAX
137 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
138 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
139 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
140 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
141 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
142 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
143 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
144 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
145 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
146 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
148 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
150 struct spu_builtin_range
152 int low, high;
155 static struct spu_builtin_range spu_builtin_range[] = {
156 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
157 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
158 {0ll, 0x7fll}, /* SPU_BTI_U7 */
159 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
160 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
161 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
162 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
163 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
164 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
165 {0ll, 0xffffll}, /* SPU_BTI_U16 */
166 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
167 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
171 /* Target specific attribute specifications. */
172 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
174 /* Prototypes and external defs. */
175 static int get_pipe (rtx_insn *insn);
176 static int spu_naked_function_p (tree func);
177 static int mem_is_padded_component_ref (rtx x);
178 static void fix_range (const char *);
179 static rtx spu_expand_load (rtx, rtx, rtx, int);
181 /* Which instruction set architecture to use. */
182 int spu_arch;
183 /* Which cpu are we tuning for. */
184 int spu_tune;
186 /* The hardware requires 8 insns between a hint and the branch it
187 effects. This variable describes how many rtl instructions the
188 compiler needs to see before inserting a hint, and then the compiler
189 will insert enough nops to make it at least 8 insns. The default is
190 for the compiler to allow up to 2 nops be emitted. The nops are
191 inserted in pairs, so we round down. */
192 int spu_hint_dist = (8*4) - (2*4);
194 enum spu_immediate {
195 SPU_NONE,
196 SPU_IL,
197 SPU_ILA,
198 SPU_ILH,
199 SPU_ILHU,
200 SPU_ORI,
201 SPU_ORHI,
202 SPU_ORBI,
203 SPU_IOHL
205 enum immediate_class
207 IC_POOL, /* constant pool */
208 IC_IL1, /* one il* instruction */
209 IC_IL2, /* both ilhu and iohl instructions */
210 IC_IL1s, /* one il* instruction */
211 IC_IL2s, /* both ilhu and iohl instructions */
212 IC_FSMBI, /* the fsmbi instruction */
213 IC_CPAT, /* one of the c*d instructions */
214 IC_FSMBI2 /* fsmbi plus 1 other instruction */
217 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
218 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
219 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
220 static enum immediate_class classify_immediate (rtx op,
221 machine_mode mode);
223 /* Pointer mode for __ea references. */
224 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
227 /* Define the structure for the machine field in struct function. */
228 struct GTY(()) machine_function
230 /* Register to use for PIC accesses. */
231 rtx pic_reg;
234 /* How to allocate a 'struct machine_function'. */
235 static struct machine_function *
236 spu_init_machine_status (void)
238 return ggc_cleared_alloc<machine_function> ();
241 /* Implement TARGET_OPTION_OVERRIDE. */
242 static void
243 spu_option_override (void)
245 /* Set up function hooks. */
246 init_machine_status = spu_init_machine_status;
248 /* Small loops will be unpeeled at -O3. For SPU it is more important
249 to keep code small by default. */
250 if (!flag_unroll_loops && !flag_peel_loops)
251 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
252 global_options.x_param_values,
253 global_options_set.x_param_values);
255 flag_omit_frame_pointer = 1;
257 /* Functions must be 8 byte aligned so we correctly handle dual issue */
258 if (align_functions < 8)
259 align_functions = 8;
261 spu_hint_dist = 8*4 - spu_max_nops*4;
262 if (spu_hint_dist < 0)
263 spu_hint_dist = 0;
265 if (spu_fixed_range_string)
266 fix_range (spu_fixed_range_string);
268 /* Determine processor architectural level. */
269 if (spu_arch_string)
271 if (strcmp (&spu_arch_string[0], "cell") == 0)
272 spu_arch = PROCESSOR_CELL;
273 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
274 spu_arch = PROCESSOR_CELLEDP;
275 else
276 error ("bad value (%s) for -march= switch", spu_arch_string);
279 /* Determine processor to tune for. */
280 if (spu_tune_string)
282 if (strcmp (&spu_tune_string[0], "cell") == 0)
283 spu_tune = PROCESSOR_CELL;
284 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
285 spu_tune = PROCESSOR_CELLEDP;
286 else
287 error ("bad value (%s) for -mtune= switch", spu_tune_string);
290 /* Change defaults according to the processor architecture. */
291 if (spu_arch == PROCESSOR_CELLEDP)
293 /* If no command line option has been otherwise specified, change
294 the default to -mno-safe-hints on celledp -- only the original
295 Cell/B.E. processors require this workaround. */
296 if (!(target_flags_explicit & MASK_SAFE_HINTS))
297 target_flags &= ~MASK_SAFE_HINTS;
300 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
303 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
304 struct attribute_spec.handler. */
306 /* True if MODE is valid for the target. By "valid", we mean able to
307 be manipulated in non-trivial ways. In particular, this means all
308 the arithmetic is supported. */
309 static bool
310 spu_scalar_mode_supported_p (machine_mode mode)
312 switch (mode)
314 case QImode:
315 case HImode:
316 case SImode:
317 case SFmode:
318 case DImode:
319 case TImode:
320 case DFmode:
321 return true;
323 default:
324 return false;
328 /* Similarly for vector modes. "Supported" here is less strict. At
329 least some operations are supported; need to check optabs or builtins
330 for further details. */
331 static bool
332 spu_vector_mode_supported_p (machine_mode mode)
334 switch (mode)
336 case V16QImode:
337 case V8HImode:
338 case V4SImode:
339 case V2DImode:
340 case V4SFmode:
341 case V2DFmode:
342 return true;
344 default:
345 return false;
349 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
350 least significant bytes of the outer mode. This function returns
351 TRUE for the SUBREG's where this is correct. */
353 valid_subreg (rtx op)
355 machine_mode om = GET_MODE (op);
356 machine_mode im = GET_MODE (SUBREG_REG (op));
357 return om != VOIDmode && im != VOIDmode
358 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
359 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
360 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
363 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
364 and adjust the start offset. */
365 static rtx
366 adjust_operand (rtx op, HOST_WIDE_INT * start)
368 machine_mode mode;
369 int op_size;
370 /* Strip any paradoxical SUBREG. */
371 if (GET_CODE (op) == SUBREG
372 && (GET_MODE_BITSIZE (GET_MODE (op))
373 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
375 if (start)
376 *start -=
377 GET_MODE_BITSIZE (GET_MODE (op)) -
378 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
379 op = SUBREG_REG (op);
381 /* If it is smaller than SI, assure a SUBREG */
382 op_size = GET_MODE_BITSIZE (GET_MODE (op));
383 if (op_size < 32)
385 if (start)
386 *start += 32 - op_size;
387 op_size = 32;
389 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
390 mode = mode_for_size (op_size, MODE_INT, 0);
391 if (mode != GET_MODE (op))
392 op = gen_rtx_SUBREG (mode, op, 0);
393 return op;
396 void
397 spu_expand_extv (rtx ops[], int unsignedp)
399 rtx dst = ops[0], src = ops[1];
400 HOST_WIDE_INT width = INTVAL (ops[2]);
401 HOST_WIDE_INT start = INTVAL (ops[3]);
402 HOST_WIDE_INT align_mask;
403 rtx s0, s1, mask, r0;
405 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
407 if (MEM_P (src))
409 /* First, determine if we need 1 TImode load or 2. We need only 1
410 if the bits being extracted do not cross the alignment boundary
411 as determined by the MEM and its address. */
413 align_mask = -MEM_ALIGN (src);
414 if ((start & align_mask) == ((start + width - 1) & align_mask))
416 /* Alignment is sufficient for 1 load. */
417 s0 = gen_reg_rtx (TImode);
418 r0 = spu_expand_load (s0, 0, src, start / 8);
419 start &= 7;
420 if (r0)
421 emit_insn (gen_rotqby_ti (s0, s0, r0));
423 else
425 /* Need 2 loads. */
426 s0 = gen_reg_rtx (TImode);
427 s1 = gen_reg_rtx (TImode);
428 r0 = spu_expand_load (s0, s1, src, start / 8);
429 start &= 7;
431 gcc_assert (start + width <= 128);
432 if (r0)
434 rtx r1 = gen_reg_rtx (SImode);
435 mask = gen_reg_rtx (TImode);
436 emit_move_insn (mask, GEN_INT (-1));
437 emit_insn (gen_rotqby_ti (s0, s0, r0));
438 emit_insn (gen_rotqby_ti (s1, s1, r0));
439 if (GET_CODE (r0) == CONST_INT)
440 r1 = GEN_INT (INTVAL (r0) & 15);
441 else
442 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
443 emit_insn (gen_shlqby_ti (mask, mask, r1));
444 emit_insn (gen_selb (s0, s1, s0, mask));
449 else if (GET_CODE (src) == SUBREG)
451 rtx r = SUBREG_REG (src);
452 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
453 s0 = gen_reg_rtx (TImode);
454 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
455 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
456 else
457 emit_move_insn (s0, src);
459 else
461 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
462 s0 = gen_reg_rtx (TImode);
463 emit_move_insn (s0, src);
466 /* Now s0 is TImode and contains the bits to extract at start. */
468 if (start)
469 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
471 if (128 - width)
472 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
474 emit_move_insn (dst, s0);
477 void
478 spu_expand_insv (rtx ops[])
480 HOST_WIDE_INT width = INTVAL (ops[1]);
481 HOST_WIDE_INT start = INTVAL (ops[2]);
482 HOST_WIDE_INT maskbits;
483 machine_mode dst_mode;
484 rtx dst = ops[0], src = ops[3];
485 int dst_size;
486 rtx mask;
487 rtx shift_reg;
488 int shift;
491 if (GET_CODE (ops[0]) == MEM)
492 dst = gen_reg_rtx (TImode);
493 else
494 dst = adjust_operand (dst, &start);
495 dst_mode = GET_MODE (dst);
496 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
498 if (CONSTANT_P (src))
500 machine_mode m =
501 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
502 src = force_reg (m, convert_to_mode (m, src, 0));
504 src = adjust_operand (src, 0);
506 mask = gen_reg_rtx (dst_mode);
507 shift_reg = gen_reg_rtx (dst_mode);
508 shift = dst_size - start - width;
510 /* It's not safe to use subreg here because the compiler assumes
511 that the SUBREG_REG is right justified in the SUBREG. */
512 convert_move (shift_reg, src, 1);
514 if (shift > 0)
516 switch (dst_mode)
518 case SImode:
519 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
520 break;
521 case DImode:
522 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
523 break;
524 case TImode:
525 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
526 break;
527 default:
528 abort ();
531 else if (shift < 0)
532 abort ();
534 switch (dst_size)
536 case 32:
537 maskbits = (-1ll << (32 - width - start));
538 if (start)
539 maskbits += (1ll << (32 - start));
540 emit_move_insn (mask, GEN_INT (maskbits));
541 break;
542 case 64:
543 maskbits = (-1ll << (64 - width - start));
544 if (start)
545 maskbits += (1ll << (64 - start));
546 emit_move_insn (mask, GEN_INT (maskbits));
547 break;
548 case 128:
550 unsigned char arr[16];
551 int i = start / 8;
552 memset (arr, 0, sizeof (arr));
553 arr[i] = 0xff >> (start & 7);
554 for (i++; i <= (start + width - 1) / 8; i++)
555 arr[i] = 0xff;
556 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
557 emit_move_insn (mask, array_to_constant (TImode, arr));
559 break;
560 default:
561 abort ();
563 if (GET_CODE (ops[0]) == MEM)
565 rtx low = gen_reg_rtx (SImode);
566 rtx rotl = gen_reg_rtx (SImode);
567 rtx mask0 = gen_reg_rtx (TImode);
568 rtx addr;
569 rtx addr0;
570 rtx addr1;
571 rtx mem;
573 addr = force_reg (Pmode, XEXP (ops[0], 0));
574 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
575 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
576 emit_insn (gen_negsi2 (rotl, low));
577 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
578 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
579 mem = change_address (ops[0], TImode, addr0);
580 set_mem_alias_set (mem, 0);
581 emit_move_insn (dst, mem);
582 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
583 if (start + width > MEM_ALIGN (ops[0]))
585 rtx shl = gen_reg_rtx (SImode);
586 rtx mask1 = gen_reg_rtx (TImode);
587 rtx dst1 = gen_reg_rtx (TImode);
588 rtx mem1;
589 addr1 = plus_constant (Pmode, addr, 16);
590 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
591 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
592 emit_insn (gen_shlqby_ti (mask1, mask, shl));
593 mem1 = change_address (ops[0], TImode, addr1);
594 set_mem_alias_set (mem1, 0);
595 emit_move_insn (dst1, mem1);
596 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
597 emit_move_insn (mem1, dst1);
599 emit_move_insn (mem, dst);
601 else
602 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
607 spu_expand_block_move (rtx ops[])
609 HOST_WIDE_INT bytes, align, offset;
610 rtx src, dst, sreg, dreg, target;
611 int i;
612 if (GET_CODE (ops[2]) != CONST_INT
613 || GET_CODE (ops[3]) != CONST_INT
614 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
615 return 0;
617 bytes = INTVAL (ops[2]);
618 align = INTVAL (ops[3]);
620 if (bytes <= 0)
621 return 1;
623 dst = ops[0];
624 src = ops[1];
626 if (align == 16)
628 for (offset = 0; offset + 16 <= bytes; offset += 16)
630 dst = adjust_address (ops[0], V16QImode, offset);
631 src = adjust_address (ops[1], V16QImode, offset);
632 emit_move_insn (dst, src);
634 if (offset < bytes)
636 rtx mask;
637 unsigned char arr[16] = { 0 };
638 for (i = 0; i < bytes - offset; i++)
639 arr[i] = 0xff;
640 dst = adjust_address (ops[0], V16QImode, offset);
641 src = adjust_address (ops[1], V16QImode, offset);
642 mask = gen_reg_rtx (V16QImode);
643 sreg = gen_reg_rtx (V16QImode);
644 dreg = gen_reg_rtx (V16QImode);
645 target = gen_reg_rtx (V16QImode);
646 emit_move_insn (mask, array_to_constant (V16QImode, arr));
647 emit_move_insn (dreg, dst);
648 emit_move_insn (sreg, src);
649 emit_insn (gen_selb (target, dreg, sreg, mask));
650 emit_move_insn (dst, target);
652 return 1;
654 return 0;
657 enum spu_comp_code
658 { SPU_EQ, SPU_GT, SPU_GTU };
660 int spu_comp_icode[12][3] = {
661 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
662 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
663 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
664 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
665 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
666 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
667 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
668 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
669 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
670 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
671 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
672 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
675 /* Generate a compare for CODE. Return a brand-new rtx that represents
676 the result of the compare. GCC can figure this out too if we don't
677 provide all variations of compares, but GCC always wants to use
678 WORD_MODE, we can generate better code in most cases if we do it
679 ourselves. */
680 void
681 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
683 int reverse_compare = 0;
684 int reverse_test = 0;
685 rtx compare_result, eq_result;
686 rtx comp_rtx, eq_rtx;
687 machine_mode comp_mode;
688 machine_mode op_mode;
689 enum spu_comp_code scode, eq_code;
690 enum insn_code ior_code;
691 enum rtx_code code = GET_CODE (cmp);
692 rtx op0 = XEXP (cmp, 0);
693 rtx op1 = XEXP (cmp, 1);
694 int index;
695 int eq_test = 0;
697 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
698 and so on, to keep the constant in operand 1. */
699 if (GET_CODE (op1) == CONST_INT)
701 HOST_WIDE_INT val = INTVAL (op1) - 1;
702 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
703 switch (code)
705 case GE:
706 op1 = GEN_INT (val);
707 code = GT;
708 break;
709 case LT:
710 op1 = GEN_INT (val);
711 code = LE;
712 break;
713 case GEU:
714 op1 = GEN_INT (val);
715 code = GTU;
716 break;
717 case LTU:
718 op1 = GEN_INT (val);
719 code = LEU;
720 break;
721 default:
722 break;
726 /* However, if we generate an integer result, performing a reverse test
727 would require an extra negation, so avoid that where possible. */
728 if (GET_CODE (op1) == CONST_INT && is_set == 1)
730 HOST_WIDE_INT val = INTVAL (op1) + 1;
731 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
732 switch (code)
734 case LE:
735 op1 = GEN_INT (val);
736 code = LT;
737 break;
738 case LEU:
739 op1 = GEN_INT (val);
740 code = LTU;
741 break;
742 default:
743 break;
747 comp_mode = SImode;
748 op_mode = GET_MODE (op0);
750 switch (code)
752 case GE:
753 scode = SPU_GT;
754 if (HONOR_NANS (op_mode))
756 reverse_compare = 0;
757 reverse_test = 0;
758 eq_test = 1;
759 eq_code = SPU_EQ;
761 else
763 reverse_compare = 1;
764 reverse_test = 1;
766 break;
767 case LE:
768 scode = SPU_GT;
769 if (HONOR_NANS (op_mode))
771 reverse_compare = 1;
772 reverse_test = 0;
773 eq_test = 1;
774 eq_code = SPU_EQ;
776 else
778 reverse_compare = 0;
779 reverse_test = 1;
781 break;
782 case LT:
783 reverse_compare = 1;
784 reverse_test = 0;
785 scode = SPU_GT;
786 break;
787 case GEU:
788 reverse_compare = 1;
789 reverse_test = 1;
790 scode = SPU_GTU;
791 break;
792 case LEU:
793 reverse_compare = 0;
794 reverse_test = 1;
795 scode = SPU_GTU;
796 break;
797 case LTU:
798 reverse_compare = 1;
799 reverse_test = 0;
800 scode = SPU_GTU;
801 break;
802 case NE:
803 reverse_compare = 0;
804 reverse_test = 1;
805 scode = SPU_EQ;
806 break;
808 case EQ:
809 scode = SPU_EQ;
810 break;
811 case GT:
812 scode = SPU_GT;
813 break;
814 case GTU:
815 scode = SPU_GTU;
816 break;
817 default:
818 scode = SPU_EQ;
819 break;
822 switch (op_mode)
824 case QImode:
825 index = 0;
826 comp_mode = QImode;
827 break;
828 case HImode:
829 index = 1;
830 comp_mode = HImode;
831 break;
832 case SImode:
833 index = 2;
834 break;
835 case DImode:
836 index = 3;
837 break;
838 case TImode:
839 index = 4;
840 break;
841 case SFmode:
842 index = 5;
843 break;
844 case DFmode:
845 index = 6;
846 break;
847 case V16QImode:
848 index = 7;
849 comp_mode = op_mode;
850 break;
851 case V8HImode:
852 index = 8;
853 comp_mode = op_mode;
854 break;
855 case V4SImode:
856 index = 9;
857 comp_mode = op_mode;
858 break;
859 case V4SFmode:
860 index = 10;
861 comp_mode = V4SImode;
862 break;
863 case V2DFmode:
864 index = 11;
865 comp_mode = V2DImode;
866 break;
867 case V2DImode:
868 default:
869 abort ();
872 if (GET_MODE (op1) == DFmode
873 && (scode != SPU_GT && scode != SPU_EQ))
874 abort ();
876 if (is_set == 0 && op1 == const0_rtx
877 && (GET_MODE (op0) == SImode
878 || GET_MODE (op0) == HImode
879 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
881 /* Don't need to set a register with the result when we are
882 comparing against zero and branching. */
883 reverse_test = !reverse_test;
884 compare_result = op0;
886 else
888 compare_result = gen_reg_rtx (comp_mode);
890 if (reverse_compare)
892 rtx t = op1;
893 op1 = op0;
894 op0 = t;
897 if (spu_comp_icode[index][scode] == 0)
898 abort ();
900 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
901 (op0, op_mode))
902 op0 = force_reg (op_mode, op0);
903 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
904 (op1, op_mode))
905 op1 = force_reg (op_mode, op1);
906 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
907 op0, op1);
908 if (comp_rtx == 0)
909 abort ();
910 emit_insn (comp_rtx);
912 if (eq_test)
914 eq_result = gen_reg_rtx (comp_mode);
915 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
916 op0, op1);
917 if (eq_rtx == 0)
918 abort ();
919 emit_insn (eq_rtx);
920 ior_code = optab_handler (ior_optab, comp_mode);
921 gcc_assert (ior_code != CODE_FOR_nothing);
922 emit_insn (GEN_FCN (ior_code)
923 (compare_result, compare_result, eq_result));
927 if (is_set == 0)
929 rtx bcomp;
930 rtx loc_ref;
932 /* We don't have branch on QI compare insns, so we convert the
933 QI compare result to a HI result. */
934 if (comp_mode == QImode)
936 rtx old_res = compare_result;
937 compare_result = gen_reg_rtx (HImode);
938 comp_mode = HImode;
939 emit_insn (gen_extendqihi2 (compare_result, old_res));
942 if (reverse_test)
943 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
944 else
945 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
947 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
948 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
949 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
950 loc_ref, pc_rtx)));
952 else if (is_set == 2)
954 rtx target = operands[0];
955 int compare_size = GET_MODE_BITSIZE (comp_mode);
956 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
957 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
958 rtx select_mask;
959 rtx op_t = operands[2];
960 rtx op_f = operands[3];
962 /* The result of the comparison can be SI, HI or QI mode. Create a
963 mask based on that result. */
964 if (target_size > compare_size)
966 select_mask = gen_reg_rtx (mode);
967 emit_insn (gen_extend_compare (select_mask, compare_result));
969 else if (target_size < compare_size)
970 select_mask =
971 gen_rtx_SUBREG (mode, compare_result,
972 (compare_size - target_size) / BITS_PER_UNIT);
973 else if (comp_mode != mode)
974 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
975 else
976 select_mask = compare_result;
978 if (GET_MODE (target) != GET_MODE (op_t)
979 || GET_MODE (target) != GET_MODE (op_f))
980 abort ();
982 if (reverse_test)
983 emit_insn (gen_selb (target, op_t, op_f, select_mask));
984 else
985 emit_insn (gen_selb (target, op_f, op_t, select_mask));
987 else
989 rtx target = operands[0];
990 if (reverse_test)
991 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
992 gen_rtx_NOT (comp_mode, compare_result)));
993 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
994 emit_insn (gen_extendhisi2 (target, compare_result));
995 else if (GET_MODE (target) == SImode
996 && GET_MODE (compare_result) == QImode)
997 emit_insn (gen_extend_compare (target, compare_result));
998 else
999 emit_move_insn (target, compare_result);
1003 HOST_WIDE_INT
1004 const_double_to_hwint (rtx x)
1006 HOST_WIDE_INT val;
1007 REAL_VALUE_TYPE rv;
1008 if (GET_MODE (x) == SFmode)
1010 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1011 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1013 else if (GET_MODE (x) == DFmode)
1015 long l[2];
1016 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1017 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1018 val = l[0];
1019 val = (val << 32) | (l[1] & 0xffffffff);
1021 else
1022 abort ();
1023 return val;
1027 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1029 long tv[2];
1030 REAL_VALUE_TYPE rv;
1031 gcc_assert (mode == SFmode || mode == DFmode);
1033 if (mode == SFmode)
1034 tv[0] = (v << 32) >> 32;
1035 else if (mode == DFmode)
1037 tv[1] = (v << 32) >> 32;
1038 tv[0] = v >> 32;
1040 real_from_target (&rv, tv, mode);
1041 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1044 void
1045 print_operand_address (FILE * file, register rtx addr)
1047 rtx reg;
1048 rtx offset;
1050 if (GET_CODE (addr) == AND
1051 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1052 && INTVAL (XEXP (addr, 1)) == -16)
1053 addr = XEXP (addr, 0);
1055 switch (GET_CODE (addr))
1057 case REG:
1058 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1059 break;
1061 case PLUS:
1062 reg = XEXP (addr, 0);
1063 offset = XEXP (addr, 1);
1064 if (GET_CODE (offset) == REG)
1066 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1067 reg_names[REGNO (offset)]);
1069 else if (GET_CODE (offset) == CONST_INT)
1071 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1072 INTVAL (offset), reg_names[REGNO (reg)]);
1074 else
1075 abort ();
1076 break;
1078 case CONST:
1079 case LABEL_REF:
1080 case SYMBOL_REF:
1081 case CONST_INT:
1082 output_addr_const (file, addr);
1083 break;
1085 default:
1086 debug_rtx (addr);
1087 abort ();
1091 void
1092 print_operand (FILE * file, rtx x, int code)
1094 machine_mode mode = GET_MODE (x);
1095 HOST_WIDE_INT val;
1096 unsigned char arr[16];
1097 int xcode = GET_CODE (x);
1098 int i, info;
1099 if (GET_MODE (x) == VOIDmode)
1100 switch (code)
1102 case 'L': /* 128 bits, signed */
1103 case 'm': /* 128 bits, signed */
1104 case 'T': /* 128 bits, signed */
1105 case 't': /* 128 bits, signed */
1106 mode = TImode;
1107 break;
1108 case 'K': /* 64 bits, signed */
1109 case 'k': /* 64 bits, signed */
1110 case 'D': /* 64 bits, signed */
1111 case 'd': /* 64 bits, signed */
1112 mode = DImode;
1113 break;
1114 case 'J': /* 32 bits, signed */
1115 case 'j': /* 32 bits, signed */
1116 case 's': /* 32 bits, signed */
1117 case 'S': /* 32 bits, signed */
1118 mode = SImode;
1119 break;
1121 switch (code)
1124 case 'j': /* 32 bits, signed */
1125 case 'k': /* 64 bits, signed */
1126 case 'm': /* 128 bits, signed */
1127 if (xcode == CONST_INT
1128 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1130 gcc_assert (logical_immediate_p (x, mode));
1131 constant_to_array (mode, x, arr);
1132 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1133 val = trunc_int_for_mode (val, SImode);
1134 switch (which_logical_immediate (val))
1136 case SPU_ORI:
1137 break;
1138 case SPU_ORHI:
1139 fprintf (file, "h");
1140 break;
1141 case SPU_ORBI:
1142 fprintf (file, "b");
1143 break;
1144 default:
1145 gcc_unreachable();
1148 else
1149 gcc_unreachable();
1150 return;
1152 case 'J': /* 32 bits, signed */
1153 case 'K': /* 64 bits, signed */
1154 case 'L': /* 128 bits, signed */
1155 if (xcode == CONST_INT
1156 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1158 gcc_assert (logical_immediate_p (x, mode)
1159 || iohl_immediate_p (x, mode));
1160 constant_to_array (mode, x, arr);
1161 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1162 val = trunc_int_for_mode (val, SImode);
1163 switch (which_logical_immediate (val))
1165 case SPU_ORI:
1166 case SPU_IOHL:
1167 break;
1168 case SPU_ORHI:
1169 val = trunc_int_for_mode (val, HImode);
1170 break;
1171 case SPU_ORBI:
1172 val = trunc_int_for_mode (val, QImode);
1173 break;
1174 default:
1175 gcc_unreachable();
1177 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1179 else
1180 gcc_unreachable();
1181 return;
1183 case 't': /* 128 bits, signed */
1184 case 'd': /* 64 bits, signed */
1185 case 's': /* 32 bits, signed */
1186 if (CONSTANT_P (x))
1188 enum immediate_class c = classify_immediate (x, mode);
1189 switch (c)
1191 case IC_IL1:
1192 constant_to_array (mode, x, arr);
1193 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1194 val = trunc_int_for_mode (val, SImode);
1195 switch (which_immediate_load (val))
1197 case SPU_IL:
1198 break;
1199 case SPU_ILA:
1200 fprintf (file, "a");
1201 break;
1202 case SPU_ILH:
1203 fprintf (file, "h");
1204 break;
1205 case SPU_ILHU:
1206 fprintf (file, "hu");
1207 break;
1208 default:
1209 gcc_unreachable ();
1211 break;
1212 case IC_CPAT:
1213 constant_to_array (mode, x, arr);
1214 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1215 if (info == 1)
1216 fprintf (file, "b");
1217 else if (info == 2)
1218 fprintf (file, "h");
1219 else if (info == 4)
1220 fprintf (file, "w");
1221 else if (info == 8)
1222 fprintf (file, "d");
1223 break;
1224 case IC_IL1s:
1225 if (xcode == CONST_VECTOR)
1227 x = CONST_VECTOR_ELT (x, 0);
1228 xcode = GET_CODE (x);
1230 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1231 fprintf (file, "a");
1232 else if (xcode == HIGH)
1233 fprintf (file, "hu");
1234 break;
1235 case IC_FSMBI:
1236 case IC_FSMBI2:
1237 case IC_IL2:
1238 case IC_IL2s:
1239 case IC_POOL:
1240 abort ();
1243 else
1244 gcc_unreachable ();
1245 return;
1247 case 'T': /* 128 bits, signed */
1248 case 'D': /* 64 bits, signed */
1249 case 'S': /* 32 bits, signed */
1250 if (CONSTANT_P (x))
1252 enum immediate_class c = classify_immediate (x, mode);
1253 switch (c)
1255 case IC_IL1:
1256 constant_to_array (mode, x, arr);
1257 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1258 val = trunc_int_for_mode (val, SImode);
1259 switch (which_immediate_load (val))
1261 case SPU_IL:
1262 case SPU_ILA:
1263 break;
1264 case SPU_ILH:
1265 case SPU_ILHU:
1266 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1267 break;
1268 default:
1269 gcc_unreachable ();
1271 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1272 break;
1273 case IC_FSMBI:
1274 constant_to_array (mode, x, arr);
1275 val = 0;
1276 for (i = 0; i < 16; i++)
1278 val <<= 1;
1279 val |= arr[i] & 1;
1281 print_operand (file, GEN_INT (val), 0);
1282 break;
1283 case IC_CPAT:
1284 constant_to_array (mode, x, arr);
1285 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1286 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1287 break;
1288 case IC_IL1s:
1289 if (xcode == HIGH)
1290 x = XEXP (x, 0);
1291 if (GET_CODE (x) == CONST_VECTOR)
1292 x = CONST_VECTOR_ELT (x, 0);
1293 output_addr_const (file, x);
1294 if (xcode == HIGH)
1295 fprintf (file, "@h");
1296 break;
1297 case IC_IL2:
1298 case IC_IL2s:
1299 case IC_FSMBI2:
1300 case IC_POOL:
1301 abort ();
1304 else
1305 gcc_unreachable ();
1306 return;
1308 case 'C':
1309 if (xcode == CONST_INT)
1311 /* Only 4 least significant bits are relevant for generate
1312 control word instructions. */
1313 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1314 return;
1316 break;
1318 case 'M': /* print code for c*d */
1319 if (GET_CODE (x) == CONST_INT)
1320 switch (INTVAL (x))
1322 case 1:
1323 fprintf (file, "b");
1324 break;
1325 case 2:
1326 fprintf (file, "h");
1327 break;
1328 case 4:
1329 fprintf (file, "w");
1330 break;
1331 case 8:
1332 fprintf (file, "d");
1333 break;
1334 default:
1335 gcc_unreachable();
1337 else
1338 gcc_unreachable();
1339 return;
1341 case 'N': /* Negate the operand */
1342 if (xcode == CONST_INT)
1343 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1344 else if (xcode == CONST_VECTOR)
1345 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1346 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1347 return;
1349 case 'I': /* enable/disable interrupts */
1350 if (xcode == CONST_INT)
1351 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1352 return;
1354 case 'b': /* branch modifiers */
1355 if (xcode == REG)
1356 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1357 else if (COMPARISON_P (x))
1358 fprintf (file, "%s", xcode == NE ? "n" : "");
1359 return;
1361 case 'i': /* indirect call */
1362 if (xcode == MEM)
1364 if (GET_CODE (XEXP (x, 0)) == REG)
1365 /* Used in indirect function calls. */
1366 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1367 else
1368 output_address (XEXP (x, 0));
1370 return;
1372 case 'p': /* load/store */
1373 if (xcode == MEM)
1375 x = XEXP (x, 0);
1376 xcode = GET_CODE (x);
1378 if (xcode == AND)
1380 x = XEXP (x, 0);
1381 xcode = GET_CODE (x);
1383 if (xcode == REG)
1384 fprintf (file, "d");
1385 else if (xcode == CONST_INT)
1386 fprintf (file, "a");
1387 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1388 fprintf (file, "r");
1389 else if (xcode == PLUS || xcode == LO_SUM)
1391 if (GET_CODE (XEXP (x, 1)) == REG)
1392 fprintf (file, "x");
1393 else
1394 fprintf (file, "d");
1396 return;
1398 case 'e':
1399 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1400 val &= 0x7;
1401 output_addr_const (file, GEN_INT (val));
1402 return;
1404 case 'f':
1405 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1406 val &= 0x1f;
1407 output_addr_const (file, GEN_INT (val));
1408 return;
1410 case 'g':
1411 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1412 val &= 0x3f;
1413 output_addr_const (file, GEN_INT (val));
1414 return;
1416 case 'h':
1417 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1418 val = (val >> 3) & 0x1f;
1419 output_addr_const (file, GEN_INT (val));
1420 return;
1422 case 'E':
1423 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424 val = -val;
1425 val &= 0x7;
1426 output_addr_const (file, GEN_INT (val));
1427 return;
1429 case 'F':
1430 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1431 val = -val;
1432 val &= 0x1f;
1433 output_addr_const (file, GEN_INT (val));
1434 return;
1436 case 'G':
1437 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1438 val = -val;
1439 val &= 0x3f;
1440 output_addr_const (file, GEN_INT (val));
1441 return;
1443 case 'H':
1444 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1445 val = -(val & -8ll);
1446 val = (val >> 3) & 0x1f;
1447 output_addr_const (file, GEN_INT (val));
1448 return;
1450 case 'v':
1451 case 'w':
1452 constant_to_array (mode, x, arr);
1453 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1454 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1455 return;
1457 case 0:
1458 if (xcode == REG)
1459 fprintf (file, "%s", reg_names[REGNO (x)]);
1460 else if (xcode == MEM)
1461 output_address (XEXP (x, 0));
1462 else if (xcode == CONST_VECTOR)
1463 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1464 else
1465 output_addr_const (file, x);
1466 return;
1468 /* unused letters
1469 o qr u yz
1470 AB OPQR UVWXYZ */
1471 default:
1472 output_operand_lossage ("invalid %%xn code");
1474 gcc_unreachable ();
1477 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1478 caller saved register. For leaf functions it is more efficient to
1479 use a volatile register because we won't need to save and restore the
1480 pic register. This routine is only valid after register allocation
1481 is completed, so we can pick an unused register. */
1482 static rtx
1483 get_pic_reg (void)
1485 if (!reload_completed && !reload_in_progress)
1486 abort ();
1488 /* If we've already made the decision, we need to keep with it. Once we've
1489 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1490 return true since the register is now live; this should not cause us to
1491 "switch back" to using pic_offset_table_rtx. */
1492 if (!cfun->machine->pic_reg)
1494 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1495 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1496 else
1497 cfun->machine->pic_reg = pic_offset_table_rtx;
1500 return cfun->machine->pic_reg;
1503 /* Split constant addresses to handle cases that are too large.
1504 Add in the pic register when in PIC mode.
1505 Split immediates that require more than 1 instruction. */
1507 spu_split_immediate (rtx * ops)
1509 machine_mode mode = GET_MODE (ops[0]);
1510 enum immediate_class c = classify_immediate (ops[1], mode);
1512 switch (c)
1514 case IC_IL2:
1516 unsigned char arrhi[16];
1517 unsigned char arrlo[16];
1518 rtx to, temp, hi, lo;
1519 int i;
1520 machine_mode imode = mode;
1521 /* We need to do reals as ints because the constant used in the
1522 IOR might not be a legitimate real constant. */
1523 imode = int_mode_for_mode (mode);
1524 constant_to_array (mode, ops[1], arrhi);
1525 if (imode != mode)
1526 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1527 else
1528 to = ops[0];
1529 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1530 for (i = 0; i < 16; i += 4)
1532 arrlo[i + 2] = arrhi[i + 2];
1533 arrlo[i + 3] = arrhi[i + 3];
1534 arrlo[i + 0] = arrlo[i + 1] = 0;
1535 arrhi[i + 2] = arrhi[i + 3] = 0;
1537 hi = array_to_constant (imode, arrhi);
1538 lo = array_to_constant (imode, arrlo);
1539 emit_move_insn (temp, hi);
1540 emit_insn (gen_rtx_SET
1541 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1542 return 1;
1544 case IC_FSMBI2:
1546 unsigned char arr_fsmbi[16];
1547 unsigned char arr_andbi[16];
1548 rtx to, reg_fsmbi, reg_and;
1549 int i;
1550 machine_mode imode = mode;
1551 /* We need to do reals as ints because the constant used in the
1552 * AND might not be a legitimate real constant. */
1553 imode = int_mode_for_mode (mode);
1554 constant_to_array (mode, ops[1], arr_fsmbi);
1555 if (imode != mode)
1556 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1557 else
1558 to = ops[0];
1559 for (i = 0; i < 16; i++)
1560 if (arr_fsmbi[i] != 0)
1562 arr_andbi[0] = arr_fsmbi[i];
1563 arr_fsmbi[i] = 0xff;
1565 for (i = 1; i < 16; i++)
1566 arr_andbi[i] = arr_andbi[0];
1567 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1568 reg_and = array_to_constant (imode, arr_andbi);
1569 emit_move_insn (to, reg_fsmbi);
1570 emit_insn (gen_rtx_SET
1571 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1572 return 1;
1574 case IC_POOL:
1575 if (reload_in_progress || reload_completed)
1577 rtx mem = force_const_mem (mode, ops[1]);
1578 if (TARGET_LARGE_MEM)
1580 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1581 emit_move_insn (addr, XEXP (mem, 0));
1582 mem = replace_equiv_address (mem, addr);
1584 emit_move_insn (ops[0], mem);
1585 return 1;
1587 break;
1588 case IC_IL1s:
1589 case IC_IL2s:
1590 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1592 if (c == IC_IL2s)
1594 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1595 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1597 else if (flag_pic)
1598 emit_insn (gen_pic (ops[0], ops[1]));
1599 if (flag_pic)
1601 rtx pic_reg = get_pic_reg ();
1602 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1604 return flag_pic || c == IC_IL2s;
1606 break;
1607 case IC_IL1:
1608 case IC_FSMBI:
1609 case IC_CPAT:
1610 break;
1612 return 0;
1615 /* SAVING is TRUE when we are generating the actual load and store
1616 instructions for REGNO. When determining the size of the stack
1617 needed for saving register we must allocate enough space for the
1618 worst case, because we don't always have the information early enough
1619 to not allocate it. But we can at least eliminate the actual loads
1620 and stores during the prologue/epilogue. */
1621 static int
1622 need_to_save_reg (int regno, int saving)
1624 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1625 return 1;
1626 if (flag_pic
1627 && regno == PIC_OFFSET_TABLE_REGNUM
1628 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1629 return 1;
1630 return 0;
1633 /* This function is only correct starting with local register
1634 allocation */
1636 spu_saved_regs_size (void)
1638 int reg_save_size = 0;
1639 int regno;
1641 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1642 if (need_to_save_reg (regno, 0))
1643 reg_save_size += 0x10;
1644 return reg_save_size;
1647 static rtx_insn *
1648 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1650 rtx reg = gen_rtx_REG (V4SImode, regno);
1651 rtx mem =
1652 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1653 return emit_insn (gen_movv4si (mem, reg));
1656 static rtx_insn *
1657 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1659 rtx reg = gen_rtx_REG (V4SImode, regno);
1660 rtx mem =
1661 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1662 return emit_insn (gen_movv4si (reg, mem));
1665 /* This happens after reload, so we need to expand it. */
1666 static rtx_insn *
1667 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1669 rtx_insn *insn;
1670 if (satisfies_constraint_K (GEN_INT (imm)))
1672 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1674 else
1676 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1677 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1678 if (REGNO (src) == REGNO (scratch))
1679 abort ();
1681 return insn;
1684 /* Return nonzero if this function is known to have a null epilogue. */
1687 direct_return (void)
1689 if (reload_completed)
1691 if (cfun->static_chain_decl == 0
1692 && (spu_saved_regs_size ()
1693 + get_frame_size ()
1694 + crtl->outgoing_args_size
1695 + crtl->args.pretend_args_size == 0)
1696 && crtl->is_leaf)
1697 return 1;
1699 return 0;
1703 The stack frame looks like this:
1704 +-------------+
1705 | incoming |
1706 | args |
1707 AP -> +-------------+
1708 | $lr save |
1709 +-------------+
1710 prev SP | back chain |
1711 +-------------+
1712 | var args |
1713 | reg save | crtl->args.pretend_args_size bytes
1714 +-------------+
1715 | ... |
1716 | saved regs | spu_saved_regs_size() bytes
1717 FP -> +-------------+
1718 | ... |
1719 | vars | get_frame_size() bytes
1720 HFP -> +-------------+
1721 | ... |
1722 | outgoing |
1723 | args | crtl->outgoing_args_size bytes
1724 +-------------+
1725 | $lr of next |
1726 | frame |
1727 +-------------+
1728 | back chain |
1729 SP -> +-------------+
1732 void
1733 spu_expand_prologue (void)
1735 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1736 HOST_WIDE_INT total_size;
1737 HOST_WIDE_INT saved_regs_size;
1738 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1739 rtx scratch_reg_0, scratch_reg_1;
1740 rtx_insn *insn;
1741 rtx real;
1743 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1744 cfun->machine->pic_reg = pic_offset_table_rtx;
1746 if (spu_naked_function_p (current_function_decl))
1747 return;
1749 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1750 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1752 saved_regs_size = spu_saved_regs_size ();
1753 total_size = size + saved_regs_size
1754 + crtl->outgoing_args_size
1755 + crtl->args.pretend_args_size;
1757 if (!crtl->is_leaf
1758 || cfun->calls_alloca || total_size > 0)
1759 total_size += STACK_POINTER_OFFSET;
1761 /* Save this first because code after this might use the link
1762 register as a scratch register. */
1763 if (!crtl->is_leaf)
1765 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1766 RTX_FRAME_RELATED_P (insn) = 1;
1769 if (total_size > 0)
1771 offset = -crtl->args.pretend_args_size;
1772 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1773 if (need_to_save_reg (regno, 1))
1775 offset -= 16;
1776 insn = frame_emit_store (regno, sp_reg, offset);
1777 RTX_FRAME_RELATED_P (insn) = 1;
1781 if (flag_pic && cfun->machine->pic_reg)
1783 rtx pic_reg = cfun->machine->pic_reg;
1784 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1785 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1788 if (total_size > 0)
1790 if (flag_stack_check)
1792 /* We compare against total_size-1 because
1793 ($sp >= total_size) <=> ($sp > total_size-1) */
1794 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1795 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1796 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1797 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1799 emit_move_insn (scratch_v4si, size_v4si);
1800 size_v4si = scratch_v4si;
1802 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1803 emit_insn (gen_vec_extractv4si
1804 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1805 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1808 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1809 the value of the previous $sp because we save it as the back
1810 chain. */
1811 if (total_size <= 2000)
1813 /* In this case we save the back chain first. */
1814 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1815 insn =
1816 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1818 else
1820 insn = emit_move_insn (scratch_reg_0, sp_reg);
1821 insn =
1822 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1824 RTX_FRAME_RELATED_P (insn) = 1;
1825 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1826 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1828 if (total_size > 2000)
1830 /* Save the back chain ptr */
1831 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1834 if (frame_pointer_needed)
1836 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1837 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1838 + crtl->outgoing_args_size;
1839 /* Set the new frame_pointer */
1840 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1841 RTX_FRAME_RELATED_P (insn) = 1;
1842 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1843 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1844 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1848 if (flag_stack_usage_info)
1849 current_function_static_stack_size = total_size;
1852 void
1853 spu_expand_epilogue (bool sibcall_p)
1855 int size = get_frame_size (), offset, regno;
1856 HOST_WIDE_INT saved_regs_size, total_size;
1857 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1858 rtx scratch_reg_0;
1860 if (spu_naked_function_p (current_function_decl))
1861 return;
1863 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1865 saved_regs_size = spu_saved_regs_size ();
1866 total_size = size + saved_regs_size
1867 + crtl->outgoing_args_size
1868 + crtl->args.pretend_args_size;
1870 if (!crtl->is_leaf
1871 || cfun->calls_alloca || total_size > 0)
1872 total_size += STACK_POINTER_OFFSET;
1874 if (total_size > 0)
1876 if (cfun->calls_alloca)
1877 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1878 else
1879 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1882 if (saved_regs_size > 0)
1884 offset = -crtl->args.pretend_args_size;
1885 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1886 if (need_to_save_reg (regno, 1))
1888 offset -= 0x10;
1889 frame_emit_load (regno, sp_reg, offset);
1894 if (!crtl->is_leaf)
1895 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1897 if (!sibcall_p)
1899 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1900 emit_jump_insn (gen__return ());
1905 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1907 if (count != 0)
1908 return 0;
1909 /* This is inefficient because it ends up copying to a save-register
1910 which then gets saved even though $lr has already been saved. But
1911 it does generate better code for leaf functions and we don't need
1912 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1913 used for __builtin_return_address anyway, so maybe we don't care if
1914 it's inefficient. */
1915 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1919 /* Given VAL, generate a constant appropriate for MODE.
1920 If MODE is a vector mode, every element will be VAL.
1921 For TImode, VAL will be zero extended to 128 bits. */
1923 spu_const (machine_mode mode, HOST_WIDE_INT val)
1925 rtx inner;
1926 rtvec v;
1927 int units, i;
1929 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1930 || GET_MODE_CLASS (mode) == MODE_FLOAT
1931 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1932 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1934 if (GET_MODE_CLASS (mode) == MODE_INT)
1935 return immed_double_const (val, 0, mode);
1937 /* val is the bit representation of the float */
1938 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1939 return hwint_to_const_double (mode, val);
1941 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1942 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1943 else
1944 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1946 units = GET_MODE_NUNITS (mode);
1948 v = rtvec_alloc (units);
1950 for (i = 0; i < units; ++i)
1951 RTVEC_ELT (v, i) = inner;
1953 return gen_rtx_CONST_VECTOR (mode, v);
1956 /* Create a MODE vector constant from 4 ints. */
1958 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1960 unsigned char arr[16];
1961 arr[0] = (a >> 24) & 0xff;
1962 arr[1] = (a >> 16) & 0xff;
1963 arr[2] = (a >> 8) & 0xff;
1964 arr[3] = (a >> 0) & 0xff;
1965 arr[4] = (b >> 24) & 0xff;
1966 arr[5] = (b >> 16) & 0xff;
1967 arr[6] = (b >> 8) & 0xff;
1968 arr[7] = (b >> 0) & 0xff;
1969 arr[8] = (c >> 24) & 0xff;
1970 arr[9] = (c >> 16) & 0xff;
1971 arr[10] = (c >> 8) & 0xff;
1972 arr[11] = (c >> 0) & 0xff;
1973 arr[12] = (d >> 24) & 0xff;
1974 arr[13] = (d >> 16) & 0xff;
1975 arr[14] = (d >> 8) & 0xff;
1976 arr[15] = (d >> 0) & 0xff;
1977 return array_to_constant(mode, arr);
1980 /* branch hint stuff */
1982 /* An array of these is used to propagate hints to predecessor blocks. */
1983 struct spu_bb_info
1985 rtx_insn *prop_jump; /* propagated from another block */
1986 int bb_index; /* the original block. */
1988 static struct spu_bb_info *spu_bb_info;
1990 #define STOP_HINT_P(INSN) \
1991 (CALL_P(INSN) \
1992 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1993 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1995 /* 1 when RTX is a hinted branch or its target. We keep track of
1996 what has been hinted so the safe-hint code can test it easily. */
1997 #define HINTED_P(RTX) \
1998 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2000 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2001 #define SCHED_ON_EVEN_P(RTX) \
2002 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2004 /* Emit a nop for INSN such that the two will dual issue. This assumes
2005 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2006 We check for TImode to handle a MULTI1 insn which has dual issued its
2007 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
2008 static void
2009 emit_nop_for_insn (rtx_insn *insn)
2011 int p;
2012 rtx_insn *new_insn;
2014 /* We need to handle JUMP_TABLE_DATA separately. */
2015 if (JUMP_TABLE_DATA_P (insn))
2017 new_insn = emit_insn_after (gen_lnop(), insn);
2018 recog_memoized (new_insn);
2019 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2020 return;
2023 p = get_pipe (insn);
2024 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2025 new_insn = emit_insn_after (gen_lnop (), insn);
2026 else if (p == 1 && GET_MODE (insn) == TImode)
2028 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2029 PUT_MODE (new_insn, TImode);
2030 PUT_MODE (insn, VOIDmode);
2032 else
2033 new_insn = emit_insn_after (gen_lnop (), insn);
2034 recog_memoized (new_insn);
2035 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2038 /* Insert nops in basic blocks to meet dual issue alignment
2039 requirements. Also make sure hbrp and hint instructions are at least
2040 one cycle apart, possibly inserting a nop. */
2041 static void
2042 pad_bb(void)
2044 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2045 int length;
2046 int addr;
2048 /* This sets up INSN_ADDRESSES. */
2049 shorten_branches (get_insns ());
2051 /* Keep track of length added by nops. */
2052 length = 0;
2054 prev_insn = 0;
2055 insn = get_insns ();
2056 if (!active_insn_p (insn))
2057 insn = next_active_insn (insn);
2058 for (; insn; insn = next_insn)
2060 next_insn = next_active_insn (insn);
2061 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2062 || INSN_CODE (insn) == CODE_FOR_hbr)
2064 if (hbr_insn)
2066 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2067 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2068 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2069 || (a1 - a0 == 4))
2071 prev_insn = emit_insn_before (gen_lnop (), insn);
2072 PUT_MODE (prev_insn, GET_MODE (insn));
2073 PUT_MODE (insn, TImode);
2074 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2075 length += 4;
2078 hbr_insn = insn;
2080 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2082 if (GET_MODE (insn) == TImode)
2083 PUT_MODE (next_insn, TImode);
2084 insn = next_insn;
2085 next_insn = next_active_insn (insn);
2087 addr = INSN_ADDRESSES (INSN_UID (insn));
2088 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2090 if (((addr + length) & 7) != 0)
2092 emit_nop_for_insn (prev_insn);
2093 length += 4;
2096 else if (GET_MODE (insn) == TImode
2097 && ((next_insn && GET_MODE (next_insn) != TImode)
2098 || get_attr_type (insn) == TYPE_MULTI0)
2099 && ((addr + length) & 7) != 0)
2101 /* prev_insn will always be set because the first insn is
2102 always 8-byte aligned. */
2103 emit_nop_for_insn (prev_insn);
2104 length += 4;
2106 prev_insn = insn;
2111 /* Routines for branch hints. */
2113 static void
2114 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2115 int distance, sbitmap blocks)
2117 rtx branch_label = 0;
2118 rtx_insn *hint;
2119 rtx_insn *insn;
2120 rtx_jump_table_data *table;
2122 if (before == 0 || branch == 0 || target == 0)
2123 return;
2125 /* While scheduling we require hints to be no further than 600, so
2126 we need to enforce that here too */
2127 if (distance > 600)
2128 return;
2130 /* If we have a Basic block note, emit it after the basic block note. */
2131 if (NOTE_INSN_BASIC_BLOCK_P (before))
2132 before = NEXT_INSN (before);
2134 branch_label = gen_label_rtx ();
2135 LABEL_NUSES (branch_label)++;
2136 LABEL_PRESERVE_P (branch_label) = 1;
2137 insn = emit_label_before (branch_label, branch);
2138 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2139 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2141 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2142 recog_memoized (hint);
2143 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2144 HINTED_P (branch) = 1;
2146 if (GET_CODE (target) == LABEL_REF)
2147 HINTED_P (XEXP (target, 0)) = 1;
2148 else if (tablejump_p (branch, 0, &table))
2150 rtvec vec;
2151 int j;
2152 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2153 vec = XVEC (PATTERN (table), 0);
2154 else
2155 vec = XVEC (PATTERN (table), 1);
2156 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2157 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2160 if (distance >= 588)
2162 /* Make sure the hint isn't scheduled any earlier than this point,
2163 which could make it too far for the branch offest to fit */
2164 insn = emit_insn_before (gen_blockage (), hint);
2165 recog_memoized (insn);
2166 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2168 else if (distance <= 8 * 4)
2170 /* To guarantee at least 8 insns between the hint and branch we
2171 insert nops. */
2172 int d;
2173 for (d = distance; d < 8 * 4; d += 4)
2175 insn =
2176 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2177 recog_memoized (insn);
2178 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2181 /* Make sure any nops inserted aren't scheduled before the hint. */
2182 insn = emit_insn_after (gen_blockage (), hint);
2183 recog_memoized (insn);
2184 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2186 /* Make sure any nops inserted aren't scheduled after the call. */
2187 if (CALL_P (branch) && distance < 8 * 4)
2189 insn = emit_insn_before (gen_blockage (), branch);
2190 recog_memoized (insn);
2191 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2196 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2197 the rtx for the branch target. */
2198 static rtx
2199 get_branch_target (rtx_insn *branch)
2201 if (JUMP_P (branch))
2203 rtx set, src;
2205 /* Return statements */
2206 if (GET_CODE (PATTERN (branch)) == RETURN)
2207 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2209 /* ASM GOTOs. */
2210 if (extract_asm_operands (PATTERN (branch)) != NULL)
2211 return NULL;
2213 set = single_set (branch);
2214 src = SET_SRC (set);
2215 if (GET_CODE (SET_DEST (set)) != PC)
2216 abort ();
2218 if (GET_CODE (src) == IF_THEN_ELSE)
2220 rtx lab = 0;
2221 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2222 if (note)
2224 /* If the more probable case is not a fall through, then
2225 try a branch hint. */
2226 int prob = XINT (note, 0);
2227 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2228 && GET_CODE (XEXP (src, 1)) != PC)
2229 lab = XEXP (src, 1);
2230 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2231 && GET_CODE (XEXP (src, 2)) != PC)
2232 lab = XEXP (src, 2);
2234 if (lab)
2236 if (GET_CODE (lab) == RETURN)
2237 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2238 return lab;
2240 return 0;
2243 return src;
2245 else if (CALL_P (branch))
2247 rtx call;
2248 /* All of our call patterns are in a PARALLEL and the CALL is
2249 the first pattern in the PARALLEL. */
2250 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2251 abort ();
2252 call = XVECEXP (PATTERN (branch), 0, 0);
2253 if (GET_CODE (call) == SET)
2254 call = SET_SRC (call);
2255 if (GET_CODE (call) != CALL)
2256 abort ();
2257 return XEXP (XEXP (call, 0), 0);
2259 return 0;
2262 /* The special $hbr register is used to prevent the insn scheduler from
2263 moving hbr insns across instructions which invalidate them. It
2264 should only be used in a clobber, and this function searches for
2265 insns which clobber it. */
2266 static bool
2267 insn_clobbers_hbr (rtx_insn *insn)
2269 if (INSN_P (insn)
2270 && GET_CODE (PATTERN (insn)) == PARALLEL)
2272 rtx parallel = PATTERN (insn);
2273 rtx clobber;
2274 int j;
2275 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2277 clobber = XVECEXP (parallel, 0, j);
2278 if (GET_CODE (clobber) == CLOBBER
2279 && GET_CODE (XEXP (clobber, 0)) == REG
2280 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2281 return 1;
2284 return 0;
2287 /* Search up to 32 insns starting at FIRST:
2288 - at any kind of hinted branch, just return
2289 - at any unconditional branch in the first 15 insns, just return
2290 - at a call or indirect branch, after the first 15 insns, force it to
2291 an even address and return
2292 - at any unconditional branch, after the first 15 insns, force it to
2293 an even address.
2294 At then end of the search, insert an hbrp within 4 insns of FIRST,
2295 and an hbrp within 16 instructions of FIRST.
2297 static void
2298 insert_hbrp_for_ilb_runout (rtx_insn *first)
2300 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2301 int addr = 0, length, first_addr = -1;
2302 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2303 int insert_lnop_after = 0;
2304 for (insn = first; insn; insn = NEXT_INSN (insn))
2305 if (INSN_P (insn))
2307 if (first_addr == -1)
2308 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2309 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2310 length = get_attr_length (insn);
2312 if (before_4 == 0 && addr + length >= 4 * 4)
2313 before_4 = insn;
2314 /* We test for 14 instructions because the first hbrp will add
2315 up to 2 instructions. */
2316 if (before_16 == 0 && addr + length >= 14 * 4)
2317 before_16 = insn;
2319 if (INSN_CODE (insn) == CODE_FOR_hbr)
2321 /* Make sure an hbrp is at least 2 cycles away from a hint.
2322 Insert an lnop after the hbrp when necessary. */
2323 if (before_4 == 0 && addr > 0)
2325 before_4 = insn;
2326 insert_lnop_after |= 1;
2328 else if (before_4 && addr <= 4 * 4)
2329 insert_lnop_after |= 1;
2330 if (before_16 == 0 && addr > 10 * 4)
2332 before_16 = insn;
2333 insert_lnop_after |= 2;
2335 else if (before_16 && addr <= 14 * 4)
2336 insert_lnop_after |= 2;
2339 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2341 if (addr < hbrp_addr0)
2342 hbrp_addr0 = addr;
2343 else if (addr < hbrp_addr1)
2344 hbrp_addr1 = addr;
2347 if (CALL_P (insn) || JUMP_P (insn))
2349 if (HINTED_P (insn))
2350 return;
2352 /* Any branch after the first 15 insns should be on an even
2353 address to avoid a special case branch. There might be
2354 some nops and/or hbrps inserted, so we test after 10
2355 insns. */
2356 if (addr > 10 * 4)
2357 SCHED_ON_EVEN_P (insn) = 1;
2360 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2361 return;
2364 if (addr + length >= 32 * 4)
2366 gcc_assert (before_4 && before_16);
2367 if (hbrp_addr0 > 4 * 4)
2369 insn =
2370 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2371 recog_memoized (insn);
2372 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2373 INSN_ADDRESSES_NEW (insn,
2374 INSN_ADDRESSES (INSN_UID (before_4)));
2375 PUT_MODE (insn, GET_MODE (before_4));
2376 PUT_MODE (before_4, TImode);
2377 if (insert_lnop_after & 1)
2379 insn = emit_insn_before (gen_lnop (), before_4);
2380 recog_memoized (insn);
2381 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2382 INSN_ADDRESSES_NEW (insn,
2383 INSN_ADDRESSES (INSN_UID (before_4)));
2384 PUT_MODE (insn, TImode);
2387 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2388 && hbrp_addr1 > 16 * 4)
2390 insn =
2391 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2392 recog_memoized (insn);
2393 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2394 INSN_ADDRESSES_NEW (insn,
2395 INSN_ADDRESSES (INSN_UID (before_16)));
2396 PUT_MODE (insn, GET_MODE (before_16));
2397 PUT_MODE (before_16, TImode);
2398 if (insert_lnop_after & 2)
2400 insn = emit_insn_before (gen_lnop (), before_16);
2401 recog_memoized (insn);
2402 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2403 INSN_ADDRESSES_NEW (insn,
2404 INSN_ADDRESSES (INSN_UID
2405 (before_16)));
2406 PUT_MODE (insn, TImode);
2409 return;
2412 else if (BARRIER_P (insn))
2413 return;
2417 /* The SPU might hang when it executes 48 inline instructions after a
2418 hinted branch jumps to its hinted target. The beginning of a
2419 function and the return from a call might have been hinted, and
2420 must be handled as well. To prevent a hang we insert 2 hbrps. The
2421 first should be within 6 insns of the branch target. The second
2422 should be within 22 insns of the branch target. When determining
2423 if hbrps are necessary, we look for only 32 inline instructions,
2424 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2425 when inserting new hbrps, we insert them within 4 and 16 insns of
2426 the target. */
2427 static void
2428 insert_hbrp (void)
2430 rtx_insn *insn;
2431 if (TARGET_SAFE_HINTS)
2433 shorten_branches (get_insns ());
2434 /* Insert hbrp at beginning of function */
2435 insn = next_active_insn (get_insns ());
2436 if (insn)
2437 insert_hbrp_for_ilb_runout (insn);
2438 /* Insert hbrp after hinted targets. */
2439 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2440 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2441 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2445 static int in_spu_reorg;
2447 static void
2448 spu_var_tracking (void)
2450 if (flag_var_tracking)
2452 df_analyze ();
2453 timevar_push (TV_VAR_TRACKING);
2454 variable_tracking_main ();
2455 timevar_pop (TV_VAR_TRACKING);
2456 df_finish_pass (false);
2460 /* Insert branch hints. There are no branch optimizations after this
2461 pass, so it's safe to set our branch hints now. */
2462 static void
2463 spu_machine_dependent_reorg (void)
2465 sbitmap blocks;
2466 basic_block bb;
2467 rtx_insn *branch, *insn;
2468 rtx branch_target = 0;
2469 int branch_addr = 0, insn_addr, required_dist = 0;
2470 int i;
2471 unsigned int j;
2473 if (!TARGET_BRANCH_HINTS || optimize == 0)
2475 /* We still do it for unoptimized code because an external
2476 function might have hinted a call or return. */
2477 compute_bb_for_insn ();
2478 insert_hbrp ();
2479 pad_bb ();
2480 spu_var_tracking ();
2481 free_bb_for_insn ();
2482 return;
2485 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2486 bitmap_clear (blocks);
2488 in_spu_reorg = 1;
2489 compute_bb_for_insn ();
2491 /* (Re-)discover loops so that bb->loop_father can be used
2492 in the analysis below. */
2493 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2495 compact_blocks ();
2497 spu_bb_info =
2498 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2499 sizeof (struct spu_bb_info));
2501 /* We need exact insn addresses and lengths. */
2502 shorten_branches (get_insns ());
2504 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2506 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2507 branch = 0;
2508 if (spu_bb_info[i].prop_jump)
2510 branch = spu_bb_info[i].prop_jump;
2511 branch_target = get_branch_target (branch);
2512 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2513 required_dist = spu_hint_dist;
2515 /* Search from end of a block to beginning. In this loop, find
2516 jumps which need a branch and emit them only when:
2517 - it's an indirect branch and we're at the insn which sets
2518 the register
2519 - we're at an insn that will invalidate the hint. e.g., a
2520 call, another hint insn, inline asm that clobbers $hbr, and
2521 some inlined operations (divmodsi4). Don't consider jumps
2522 because they are only at the end of a block and are
2523 considered when we are deciding whether to propagate
2524 - we're getting too far away from the branch. The hbr insns
2525 only have a signed 10 bit offset
2526 We go back as far as possible so the branch will be considered
2527 for propagation when we get to the beginning of the block. */
2528 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2530 if (INSN_P (insn))
2532 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2533 if (branch
2534 && ((GET_CODE (branch_target) == REG
2535 && set_of (branch_target, insn) != NULL_RTX)
2536 || insn_clobbers_hbr (insn)
2537 || branch_addr - insn_addr > 600))
2539 rtx_insn *next = NEXT_INSN (insn);
2540 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2541 if (insn != BB_END (bb)
2542 && branch_addr - next_addr >= required_dist)
2544 if (dump_file)
2545 fprintf (dump_file,
2546 "hint for %i in block %i before %i\n",
2547 INSN_UID (branch), bb->index,
2548 INSN_UID (next));
2549 spu_emit_branch_hint (next, branch, branch_target,
2550 branch_addr - next_addr, blocks);
2552 branch = 0;
2555 /* JUMP_P will only be true at the end of a block. When
2556 branch is already set it means we've previously decided
2557 to propagate a hint for that branch into this block. */
2558 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2560 branch = 0;
2561 if ((branch_target = get_branch_target (insn)))
2563 branch = insn;
2564 branch_addr = insn_addr;
2565 required_dist = spu_hint_dist;
2569 if (insn == BB_HEAD (bb))
2570 break;
2573 if (branch)
2575 /* If we haven't emitted a hint for this branch yet, it might
2576 be profitable to emit it in one of the predecessor blocks,
2577 especially for loops. */
2578 rtx_insn *bbend;
2579 basic_block prev = 0, prop = 0, prev2 = 0;
2580 int loop_exit = 0, simple_loop = 0;
2581 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2583 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2584 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2585 prev = EDGE_PRED (bb, j)->src;
2586 else
2587 prev2 = EDGE_PRED (bb, j)->src;
2589 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2590 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2591 loop_exit = 1;
2592 else if (EDGE_SUCC (bb, j)->dest == bb)
2593 simple_loop = 1;
2595 /* If this branch is a loop exit then propagate to previous
2596 fallthru block. This catches the cases when it is a simple
2597 loop or when there is an initial branch into the loop. */
2598 if (prev && (loop_exit || simple_loop)
2599 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2600 prop = prev;
2602 /* If there is only one adjacent predecessor. Don't propagate
2603 outside this loop. */
2604 else if (prev && single_pred_p (bb)
2605 && prev->loop_father == bb->loop_father)
2606 prop = prev;
2608 /* If this is the JOIN block of a simple IF-THEN then
2609 propagate the hint to the HEADER block. */
2610 else if (prev && prev2
2611 && EDGE_COUNT (bb->preds) == 2
2612 && EDGE_COUNT (prev->preds) == 1
2613 && EDGE_PRED (prev, 0)->src == prev2
2614 && prev2->loop_father == bb->loop_father
2615 && GET_CODE (branch_target) != REG)
2616 prop = prev;
2618 /* Don't propagate when:
2619 - this is a simple loop and the hint would be too far
2620 - this is not a simple loop and there are 16 insns in
2621 this block already
2622 - the predecessor block ends in a branch that will be
2623 hinted
2624 - the predecessor block ends in an insn that invalidates
2625 the hint */
2626 if (prop
2627 && prop->index >= 0
2628 && (bbend = BB_END (prop))
2629 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2630 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2631 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2633 if (dump_file)
2634 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2635 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2636 bb->index, prop->index, bb_loop_depth (bb),
2637 INSN_UID (branch), loop_exit, simple_loop,
2638 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2640 spu_bb_info[prop->index].prop_jump = branch;
2641 spu_bb_info[prop->index].bb_index = i;
2643 else if (branch_addr - next_addr >= required_dist)
2645 if (dump_file)
2646 fprintf (dump_file, "hint for %i in block %i before %i\n",
2647 INSN_UID (branch), bb->index,
2648 INSN_UID (NEXT_INSN (insn)));
2649 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2650 branch_addr - next_addr, blocks);
2652 branch = 0;
2655 free (spu_bb_info);
2657 if (!bitmap_empty_p (blocks))
2658 find_many_sub_basic_blocks (blocks);
2660 /* We have to schedule to make sure alignment is ok. */
2661 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2663 /* The hints need to be scheduled, so call it again. */
2664 schedule_insns ();
2665 df_finish_pass (true);
2667 insert_hbrp ();
2669 pad_bb ();
2671 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2672 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2674 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2675 between its branch label and the branch . We don't move the
2676 label because GCC expects it at the beginning of the block. */
2677 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2678 rtx label_ref = XVECEXP (unspec, 0, 0);
2679 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2680 rtx_insn *branch;
2681 int offset = 0;
2682 for (branch = NEXT_INSN (label);
2683 !JUMP_P (branch) && !CALL_P (branch);
2684 branch = NEXT_INSN (branch))
2685 if (NONJUMP_INSN_P (branch))
2686 offset += get_attr_length (branch);
2687 if (offset > 0)
2688 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2691 spu_var_tracking ();
2693 loop_optimizer_finalize ();
2695 free_bb_for_insn ();
2697 in_spu_reorg = 0;
2701 /* Insn scheduling routines, primarily for dual issue. */
2702 static int
2703 spu_sched_issue_rate (void)
2705 return 2;
2708 static int
2709 uses_ls_unit(rtx_insn *insn)
2711 rtx set = single_set (insn);
2712 if (set != 0
2713 && (GET_CODE (SET_DEST (set)) == MEM
2714 || GET_CODE (SET_SRC (set)) == MEM))
2715 return 1;
2716 return 0;
2719 static int
2720 get_pipe (rtx_insn *insn)
2722 enum attr_type t;
2723 /* Handle inline asm */
2724 if (INSN_CODE (insn) == -1)
2725 return -1;
2726 t = get_attr_type (insn);
2727 switch (t)
2729 case TYPE_CONVERT:
2730 return -2;
2731 case TYPE_MULTI0:
2732 return -1;
2734 case TYPE_FX2:
2735 case TYPE_FX3:
2736 case TYPE_SPR:
2737 case TYPE_NOP:
2738 case TYPE_FXB:
2739 case TYPE_FPD:
2740 case TYPE_FP6:
2741 case TYPE_FP7:
2742 return 0;
2744 case TYPE_LNOP:
2745 case TYPE_SHUF:
2746 case TYPE_LOAD:
2747 case TYPE_STORE:
2748 case TYPE_BR:
2749 case TYPE_MULTI1:
2750 case TYPE_HBR:
2751 case TYPE_IPREFETCH:
2752 return 1;
2753 default:
2754 abort ();
2759 /* haifa-sched.c has a static variable that keeps track of the current
2760 cycle. It is passed to spu_sched_reorder, and we record it here for
2761 use by spu_sched_variable_issue. It won't be accurate if the
2762 scheduler updates it's clock_var between the two calls. */
2763 static int clock_var;
2765 /* This is used to keep track of insn alignment. Set to 0 at the
2766 beginning of each block and increased by the "length" attr of each
2767 insn scheduled. */
2768 static int spu_sched_length;
2770 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2771 ready list appropriately in spu_sched_reorder(). */
2772 static int pipe0_clock;
2773 static int pipe1_clock;
2775 static int prev_clock_var;
2777 static int prev_priority;
2779 /* The SPU needs to load the next ilb sometime during the execution of
2780 the previous ilb. There is a potential conflict if every cycle has a
2781 load or store. To avoid the conflict we make sure the load/store
2782 unit is free for at least one cycle during the execution of insns in
2783 the previous ilb. */
2784 static int spu_ls_first;
2785 static int prev_ls_clock;
2787 static void
2788 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2789 int max_ready ATTRIBUTE_UNUSED)
2791 spu_sched_length = 0;
2794 static void
2795 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2796 int max_ready ATTRIBUTE_UNUSED)
2798 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2800 /* When any block might be at least 8-byte aligned, assume they
2801 will all be at least 8-byte aligned to make sure dual issue
2802 works out correctly. */
2803 spu_sched_length = 0;
2805 spu_ls_first = INT_MAX;
2806 clock_var = -1;
2807 prev_ls_clock = -1;
2808 pipe0_clock = -1;
2809 pipe1_clock = -1;
2810 prev_clock_var = -1;
2811 prev_priority = -1;
2814 static int
2815 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2816 int verbose ATTRIBUTE_UNUSED,
2817 rtx_insn *insn, int more)
2819 int len;
2820 int p;
2821 if (GET_CODE (PATTERN (insn)) == USE
2822 || GET_CODE (PATTERN (insn)) == CLOBBER
2823 || (len = get_attr_length (insn)) == 0)
2824 return more;
2826 spu_sched_length += len;
2828 /* Reset on inline asm */
2829 if (INSN_CODE (insn) == -1)
2831 spu_ls_first = INT_MAX;
2832 pipe0_clock = -1;
2833 pipe1_clock = -1;
2834 return 0;
2836 p = get_pipe (insn);
2837 if (p == 0)
2838 pipe0_clock = clock_var;
2839 else
2840 pipe1_clock = clock_var;
2842 if (in_spu_reorg)
2844 if (clock_var - prev_ls_clock > 1
2845 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2846 spu_ls_first = INT_MAX;
2847 if (uses_ls_unit (insn))
2849 if (spu_ls_first == INT_MAX)
2850 spu_ls_first = spu_sched_length;
2851 prev_ls_clock = clock_var;
2854 /* The scheduler hasn't inserted the nop, but we will later on.
2855 Include those nops in spu_sched_length. */
2856 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2857 spu_sched_length += 4;
2858 prev_clock_var = clock_var;
2860 /* more is -1 when called from spu_sched_reorder for new insns
2861 that don't have INSN_PRIORITY */
2862 if (more >= 0)
2863 prev_priority = INSN_PRIORITY (insn);
2866 /* Always try issuing more insns. spu_sched_reorder will decide
2867 when the cycle should be advanced. */
2868 return 1;
2871 /* This function is called for both TARGET_SCHED_REORDER and
2872 TARGET_SCHED_REORDER2. */
2873 static int
2874 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2875 rtx_insn **ready, int *nreadyp, int clock)
2877 int i, nready = *nreadyp;
2878 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2879 rtx_insn *insn;
2881 clock_var = clock;
2883 if (nready <= 0 || pipe1_clock >= clock)
2884 return 0;
2886 /* Find any rtl insns that don't generate assembly insns and schedule
2887 them first. */
2888 for (i = nready - 1; i >= 0; i--)
2890 insn = ready[i];
2891 if (INSN_CODE (insn) == -1
2892 || INSN_CODE (insn) == CODE_FOR_blockage
2893 || (INSN_P (insn) && get_attr_length (insn) == 0))
2895 ready[i] = ready[nready - 1];
2896 ready[nready - 1] = insn;
2897 return 1;
2901 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2902 for (i = 0; i < nready; i++)
2903 if (INSN_CODE (ready[i]) != -1)
2905 insn = ready[i];
2906 switch (get_attr_type (insn))
2908 default:
2909 case TYPE_MULTI0:
2910 case TYPE_CONVERT:
2911 case TYPE_FX2:
2912 case TYPE_FX3:
2913 case TYPE_SPR:
2914 case TYPE_NOP:
2915 case TYPE_FXB:
2916 case TYPE_FPD:
2917 case TYPE_FP6:
2918 case TYPE_FP7:
2919 pipe_0 = i;
2920 break;
2921 case TYPE_LOAD:
2922 case TYPE_STORE:
2923 pipe_ls = i;
2924 case TYPE_LNOP:
2925 case TYPE_SHUF:
2926 case TYPE_BR:
2927 case TYPE_MULTI1:
2928 case TYPE_HBR:
2929 pipe_1 = i;
2930 break;
2931 case TYPE_IPREFETCH:
2932 pipe_hbrp = i;
2933 break;
2937 /* In the first scheduling phase, schedule loads and stores together
2938 to increase the chance they will get merged during postreload CSE. */
2939 if (!reload_completed && pipe_ls >= 0)
2941 insn = ready[pipe_ls];
2942 ready[pipe_ls] = ready[nready - 1];
2943 ready[nready - 1] = insn;
2944 return 1;
2947 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2948 if (pipe_hbrp >= 0)
2949 pipe_1 = pipe_hbrp;
2951 /* When we have loads/stores in every cycle of the last 15 insns and
2952 we are about to schedule another load/store, emit an hbrp insn
2953 instead. */
2954 if (in_spu_reorg
2955 && spu_sched_length - spu_ls_first >= 4 * 15
2956 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2958 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2959 recog_memoized (insn);
2960 if (pipe0_clock < clock)
2961 PUT_MODE (insn, TImode);
2962 spu_sched_variable_issue (file, verbose, insn, -1);
2963 return 0;
2966 /* In general, we want to emit nops to increase dual issue, but dual
2967 issue isn't faster when one of the insns could be scheduled later
2968 without effecting the critical path. We look at INSN_PRIORITY to
2969 make a good guess, but it isn't perfect so -mdual-nops=n can be
2970 used to effect it. */
2971 if (in_spu_reorg && spu_dual_nops < 10)
2973 /* When we are at an even address and we are not issuing nops to
2974 improve scheduling then we need to advance the cycle. */
2975 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2976 && (spu_dual_nops == 0
2977 || (pipe_1 != -1
2978 && prev_priority >
2979 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2980 return 0;
2982 /* When at an odd address, schedule the highest priority insn
2983 without considering pipeline. */
2984 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2985 && (spu_dual_nops == 0
2986 || (prev_priority >
2987 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2988 return 1;
2992 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2993 pipe0 insn in the ready list, schedule it. */
2994 if (pipe0_clock < clock && pipe_0 >= 0)
2995 schedule_i = pipe_0;
2997 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2998 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2999 else
3000 schedule_i = pipe_1;
3002 if (schedule_i > -1)
3004 insn = ready[schedule_i];
3005 ready[schedule_i] = ready[nready - 1];
3006 ready[nready - 1] = insn;
3007 return 1;
3009 return 0;
3012 /* INSN is dependent on DEP_INSN. */
3013 static int
3014 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3016 rtx set;
3018 /* The blockage pattern is used to prevent instructions from being
3019 moved across it and has no cost. */
3020 if (INSN_CODE (insn) == CODE_FOR_blockage
3021 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3022 return 0;
3024 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3025 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3026 return 0;
3028 /* Make sure hbrps are spread out. */
3029 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3030 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3031 return 8;
3033 /* Make sure hints and hbrps are 2 cycles apart. */
3034 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3035 || INSN_CODE (insn) == CODE_FOR_hbr)
3036 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3037 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3038 return 2;
3040 /* An hbrp has no real dependency on other insns. */
3041 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3042 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3043 return 0;
3045 /* Assuming that it is unlikely an argument register will be used in
3046 the first cycle of the called function, we reduce the cost for
3047 slightly better scheduling of dep_insn. When not hinted, the
3048 mispredicted branch would hide the cost as well. */
3049 if (CALL_P (insn))
3051 rtx target = get_branch_target (insn);
3052 if (GET_CODE (target) != REG || !set_of (target, insn))
3053 return cost - 2;
3054 return cost;
3057 /* And when returning from a function, let's assume the return values
3058 are completed sooner too. */
3059 if (CALL_P (dep_insn))
3060 return cost - 2;
3062 /* Make sure an instruction that loads from the back chain is schedule
3063 away from the return instruction so a hint is more likely to get
3064 issued. */
3065 if (INSN_CODE (insn) == CODE_FOR__return
3066 && (set = single_set (dep_insn))
3067 && GET_CODE (SET_DEST (set)) == REG
3068 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3069 return 20;
3071 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3072 scheduler makes every insn in a block anti-dependent on the final
3073 jump_insn. We adjust here so higher cost insns will get scheduled
3074 earlier. */
3075 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3076 return insn_cost (dep_insn) - 3;
3078 return cost;
3081 /* Create a CONST_DOUBLE from a string. */
3083 spu_float_const (const char *string, machine_mode mode)
3085 REAL_VALUE_TYPE value;
3086 value = REAL_VALUE_ATOF (string, mode);
3087 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3091 spu_constant_address_p (rtx x)
3093 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3094 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3095 || GET_CODE (x) == HIGH);
3098 static enum spu_immediate
3099 which_immediate_load (HOST_WIDE_INT val)
3101 gcc_assert (val == trunc_int_for_mode (val, SImode));
3103 if (val >= -0x8000 && val <= 0x7fff)
3104 return SPU_IL;
3105 if (val >= 0 && val <= 0x3ffff)
3106 return SPU_ILA;
3107 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3108 return SPU_ILH;
3109 if ((val & 0xffff) == 0)
3110 return SPU_ILHU;
3112 return SPU_NONE;
3115 /* Return true when OP can be loaded by one of the il instructions, or
3116 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3118 immediate_load_p (rtx op, machine_mode mode)
3120 if (CONSTANT_P (op))
3122 enum immediate_class c = classify_immediate (op, mode);
3123 return c == IC_IL1 || c == IC_IL1s
3124 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3126 return 0;
3129 /* Return true if the first SIZE bytes of arr is a constant that can be
3130 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3131 represent the size and offset of the instruction to use. */
3132 static int
3133 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3135 int cpat, run, i, start;
3136 cpat = 1;
3137 run = 0;
3138 start = -1;
3139 for (i = 0; i < size && cpat; i++)
3140 if (arr[i] != i+16)
3142 if (!run)
3144 start = i;
3145 if (arr[i] == 3)
3146 run = 1;
3147 else if (arr[i] == 2 && arr[i+1] == 3)
3148 run = 2;
3149 else if (arr[i] == 0)
3151 while (arr[i+run] == run && i+run < 16)
3152 run++;
3153 if (run != 4 && run != 8)
3154 cpat = 0;
3156 else
3157 cpat = 0;
3158 if ((i & (run-1)) != 0)
3159 cpat = 0;
3160 i += run;
3162 else
3163 cpat = 0;
3165 if (cpat && (run || size < 16))
3167 if (run == 0)
3168 run = 1;
3169 if (prun)
3170 *prun = run;
3171 if (pstart)
3172 *pstart = start == -1 ? 16-run : start;
3173 return 1;
3175 return 0;
3178 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3179 it into a register. MODE is only valid when OP is a CONST_INT. */
3180 static enum immediate_class
3181 classify_immediate (rtx op, machine_mode mode)
3183 HOST_WIDE_INT val;
3184 unsigned char arr[16];
3185 int i, j, repeated, fsmbi, repeat;
3187 gcc_assert (CONSTANT_P (op));
3189 if (GET_MODE (op) != VOIDmode)
3190 mode = GET_MODE (op);
3192 /* A V4SI const_vector with all identical symbols is ok. */
3193 if (!flag_pic
3194 && mode == V4SImode
3195 && GET_CODE (op) == CONST_VECTOR
3196 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3197 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3198 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3199 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3200 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3201 op = CONST_VECTOR_ELT (op, 0);
3203 switch (GET_CODE (op))
3205 case SYMBOL_REF:
3206 case LABEL_REF:
3207 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3209 case CONST:
3210 /* We can never know if the resulting address fits in 18 bits and can be
3211 loaded with ila. For now, assume the address will not overflow if
3212 the displacement is "small" (fits 'K' constraint). */
3213 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3215 rtx sym = XEXP (XEXP (op, 0), 0);
3216 rtx cst = XEXP (XEXP (op, 0), 1);
3218 if (GET_CODE (sym) == SYMBOL_REF
3219 && GET_CODE (cst) == CONST_INT
3220 && satisfies_constraint_K (cst))
3221 return IC_IL1s;
3223 return IC_IL2s;
3225 case HIGH:
3226 return IC_IL1s;
3228 case CONST_VECTOR:
3229 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3230 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3231 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3232 return IC_POOL;
3233 /* Fall through. */
3235 case CONST_INT:
3236 case CONST_DOUBLE:
3237 constant_to_array (mode, op, arr);
3239 /* Check that each 4-byte slot is identical. */
3240 repeated = 1;
3241 for (i = 4; i < 16; i += 4)
3242 for (j = 0; j < 4; j++)
3243 if (arr[j] != arr[i + j])
3244 repeated = 0;
3246 if (repeated)
3248 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3249 val = trunc_int_for_mode (val, SImode);
3251 if (which_immediate_load (val) != SPU_NONE)
3252 return IC_IL1;
3255 /* Any mode of 2 bytes or smaller can be loaded with an il
3256 instruction. */
3257 gcc_assert (GET_MODE_SIZE (mode) > 2);
3259 fsmbi = 1;
3260 repeat = 0;
3261 for (i = 0; i < 16 && fsmbi; i++)
3262 if (arr[i] != 0 && repeat == 0)
3263 repeat = arr[i];
3264 else if (arr[i] != 0 && arr[i] != repeat)
3265 fsmbi = 0;
3266 if (fsmbi)
3267 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3269 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3270 return IC_CPAT;
3272 if (repeated)
3273 return IC_IL2;
3275 return IC_POOL;
3276 default:
3277 break;
3279 gcc_unreachable ();
3282 static enum spu_immediate
3283 which_logical_immediate (HOST_WIDE_INT val)
3285 gcc_assert (val == trunc_int_for_mode (val, SImode));
3287 if (val >= -0x200 && val <= 0x1ff)
3288 return SPU_ORI;
3289 if (val >= 0 && val <= 0xffff)
3290 return SPU_IOHL;
3291 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3293 val = trunc_int_for_mode (val, HImode);
3294 if (val >= -0x200 && val <= 0x1ff)
3295 return SPU_ORHI;
3296 if ((val & 0xff) == ((val >> 8) & 0xff))
3298 val = trunc_int_for_mode (val, QImode);
3299 if (val >= -0x200 && val <= 0x1ff)
3300 return SPU_ORBI;
3303 return SPU_NONE;
3306 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3307 CONST_DOUBLEs. */
3308 static int
3309 const_vector_immediate_p (rtx x)
3311 int i;
3312 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3313 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3314 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3315 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3316 return 0;
3317 return 1;
3321 logical_immediate_p (rtx op, machine_mode mode)
3323 HOST_WIDE_INT val;
3324 unsigned char arr[16];
3325 int i, j;
3327 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3328 || GET_CODE (op) == CONST_VECTOR);
3330 if (GET_CODE (op) == CONST_VECTOR
3331 && !const_vector_immediate_p (op))
3332 return 0;
3334 if (GET_MODE (op) != VOIDmode)
3335 mode = GET_MODE (op);
3337 constant_to_array (mode, op, arr);
3339 /* Check that bytes are repeated. */
3340 for (i = 4; i < 16; i += 4)
3341 for (j = 0; j < 4; j++)
3342 if (arr[j] != arr[i + j])
3343 return 0;
3345 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3346 val = trunc_int_for_mode (val, SImode);
3348 i = which_logical_immediate (val);
3349 return i != SPU_NONE && i != SPU_IOHL;
3353 iohl_immediate_p (rtx op, machine_mode mode)
3355 HOST_WIDE_INT val;
3356 unsigned char arr[16];
3357 int i, j;
3359 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3360 || GET_CODE (op) == CONST_VECTOR);
3362 if (GET_CODE (op) == CONST_VECTOR
3363 && !const_vector_immediate_p (op))
3364 return 0;
3366 if (GET_MODE (op) != VOIDmode)
3367 mode = GET_MODE (op);
3369 constant_to_array (mode, op, arr);
3371 /* Check that bytes are repeated. */
3372 for (i = 4; i < 16; i += 4)
3373 for (j = 0; j < 4; j++)
3374 if (arr[j] != arr[i + j])
3375 return 0;
3377 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3378 val = trunc_int_for_mode (val, SImode);
3380 return val >= 0 && val <= 0xffff;
3384 arith_immediate_p (rtx op, machine_mode mode,
3385 HOST_WIDE_INT low, HOST_WIDE_INT high)
3387 HOST_WIDE_INT val;
3388 unsigned char arr[16];
3389 int bytes, i, j;
3391 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3392 || GET_CODE (op) == CONST_VECTOR);
3394 if (GET_CODE (op) == CONST_VECTOR
3395 && !const_vector_immediate_p (op))
3396 return 0;
3398 if (GET_MODE (op) != VOIDmode)
3399 mode = GET_MODE (op);
3401 constant_to_array (mode, op, arr);
3403 if (VECTOR_MODE_P (mode))
3404 mode = GET_MODE_INNER (mode);
3406 bytes = GET_MODE_SIZE (mode);
3407 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3409 /* Check that bytes are repeated. */
3410 for (i = bytes; i < 16; i += bytes)
3411 for (j = 0; j < bytes; j++)
3412 if (arr[j] != arr[i + j])
3413 return 0;
3415 val = arr[0];
3416 for (j = 1; j < bytes; j++)
3417 val = (val << 8) | arr[j];
3419 val = trunc_int_for_mode (val, mode);
3421 return val >= low && val <= high;
3424 /* TRUE when op is an immediate and an exact power of 2, and given that
3425 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3426 all entries must be the same. */
3427 bool
3428 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3430 machine_mode int_mode;
3431 HOST_WIDE_INT val;
3432 unsigned char arr[16];
3433 int bytes, i, j;
3435 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3436 || GET_CODE (op) == CONST_VECTOR);
3438 if (GET_CODE (op) == CONST_VECTOR
3439 && !const_vector_immediate_p (op))
3440 return 0;
3442 if (GET_MODE (op) != VOIDmode)
3443 mode = GET_MODE (op);
3445 constant_to_array (mode, op, arr);
3447 if (VECTOR_MODE_P (mode))
3448 mode = GET_MODE_INNER (mode);
3450 bytes = GET_MODE_SIZE (mode);
3451 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3453 /* Check that bytes are repeated. */
3454 for (i = bytes; i < 16; i += bytes)
3455 for (j = 0; j < bytes; j++)
3456 if (arr[j] != arr[i + j])
3457 return 0;
3459 val = arr[0];
3460 for (j = 1; j < bytes; j++)
3461 val = (val << 8) | arr[j];
3463 val = trunc_int_for_mode (val, int_mode);
3465 /* Currently, we only handle SFmode */
3466 gcc_assert (mode == SFmode);
3467 if (mode == SFmode)
3469 int exp = (val >> 23) - 127;
3470 return val > 0 && (val & 0x007fffff) == 0
3471 && exp >= low && exp <= high;
3473 return FALSE;
3476 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3478 static bool
3479 ea_symbol_ref_p (const_rtx x)
3481 tree decl;
3483 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3485 rtx plus = XEXP (x, 0);
3486 rtx op0 = XEXP (plus, 0);
3487 rtx op1 = XEXP (plus, 1);
3488 if (GET_CODE (op1) == CONST_INT)
3489 x = op0;
3492 return (GET_CODE (x) == SYMBOL_REF
3493 && (decl = SYMBOL_REF_DECL (x)) != 0
3494 && TREE_CODE (decl) == VAR_DECL
3495 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3498 /* We accept:
3499 - any 32-bit constant (SImode, SFmode)
3500 - any constant that can be generated with fsmbi (any mode)
3501 - a 64-bit constant where the high and low bits are identical
3502 (DImode, DFmode)
3503 - a 128-bit constant where the four 32-bit words match. */
3504 bool
3505 spu_legitimate_constant_p (machine_mode mode, rtx x)
3507 subrtx_iterator::array_type array;
3508 if (GET_CODE (x) == HIGH)
3509 x = XEXP (x, 0);
3511 /* Reject any __ea qualified reference. These can't appear in
3512 instructions but must be forced to the constant pool. */
3513 FOR_EACH_SUBRTX (iter, array, x, ALL)
3514 if (ea_symbol_ref_p (*iter))
3515 return 0;
3517 /* V4SI with all identical symbols is valid. */
3518 if (!flag_pic
3519 && mode == V4SImode
3520 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3521 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3522 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3523 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3524 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3525 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3527 if (GET_CODE (x) == CONST_VECTOR
3528 && !const_vector_immediate_p (x))
3529 return 0;
3530 return 1;
3533 /* Valid address are:
3534 - symbol_ref, label_ref, const
3535 - reg
3536 - reg + const_int, where const_int is 16 byte aligned
3537 - reg + reg, alignment doesn't matter
3538 The alignment matters in the reg+const case because lqd and stqd
3539 ignore the 4 least significant bits of the const. We only care about
3540 16 byte modes because the expand phase will change all smaller MEM
3541 references to TImode. */
3542 static bool
3543 spu_legitimate_address_p (machine_mode mode,
3544 rtx x, bool reg_ok_strict)
3546 int aligned = GET_MODE_SIZE (mode) >= 16;
3547 if (aligned
3548 && GET_CODE (x) == AND
3549 && GET_CODE (XEXP (x, 1)) == CONST_INT
3550 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3551 x = XEXP (x, 0);
3552 switch (GET_CODE (x))
3554 case LABEL_REF:
3555 return !TARGET_LARGE_MEM;
3557 case SYMBOL_REF:
3558 case CONST:
3559 /* Keep __ea references until reload so that spu_expand_mov can see them
3560 in MEMs. */
3561 if (ea_symbol_ref_p (x))
3562 return !reload_in_progress && !reload_completed;
3563 return !TARGET_LARGE_MEM;
3565 case CONST_INT:
3566 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3568 case SUBREG:
3569 x = XEXP (x, 0);
3570 if (REG_P (x))
3571 return 0;
3573 case REG:
3574 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3576 case PLUS:
3577 case LO_SUM:
3579 rtx op0 = XEXP (x, 0);
3580 rtx op1 = XEXP (x, 1);
3581 if (GET_CODE (op0) == SUBREG)
3582 op0 = XEXP (op0, 0);
3583 if (GET_CODE (op1) == SUBREG)
3584 op1 = XEXP (op1, 0);
3585 if (GET_CODE (op0) == REG
3586 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3587 && GET_CODE (op1) == CONST_INT
3588 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3589 /* If virtual registers are involved, the displacement will
3590 change later on anyway, so checking would be premature.
3591 Reload will make sure the final displacement after
3592 register elimination is OK. */
3593 || op0 == arg_pointer_rtx
3594 || op0 == frame_pointer_rtx
3595 || op0 == virtual_stack_vars_rtx)
3596 && (!aligned || (INTVAL (op1) & 15) == 0))
3597 return TRUE;
3598 if (GET_CODE (op0) == REG
3599 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3600 && GET_CODE (op1) == REG
3601 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3602 return TRUE;
3604 break;
3606 default:
3607 break;
3609 return FALSE;
3612 /* Like spu_legitimate_address_p, except with named addresses. */
3613 static bool
3614 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3615 bool reg_ok_strict, addr_space_t as)
3617 if (as == ADDR_SPACE_EA)
3618 return (REG_P (x) && (GET_MODE (x) == EAmode));
3620 else if (as != ADDR_SPACE_GENERIC)
3621 gcc_unreachable ();
3623 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3626 /* When the address is reg + const_int, force the const_int into a
3627 register. */
3628 static rtx
3629 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3630 machine_mode mode ATTRIBUTE_UNUSED)
3632 rtx op0, op1;
3633 /* Make sure both operands are registers. */
3634 if (GET_CODE (x) == PLUS)
3636 op0 = XEXP (x, 0);
3637 op1 = XEXP (x, 1);
3638 if (ALIGNED_SYMBOL_REF_P (op0))
3640 op0 = force_reg (Pmode, op0);
3641 mark_reg_pointer (op0, 128);
3643 else if (GET_CODE (op0) != REG)
3644 op0 = force_reg (Pmode, op0);
3645 if (ALIGNED_SYMBOL_REF_P (op1))
3647 op1 = force_reg (Pmode, op1);
3648 mark_reg_pointer (op1, 128);
3650 else if (GET_CODE (op1) != REG)
3651 op1 = force_reg (Pmode, op1);
3652 x = gen_rtx_PLUS (Pmode, op0, op1);
3654 return x;
3657 /* Like spu_legitimate_address, except with named address support. */
3658 static rtx
3659 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3660 addr_space_t as)
3662 if (as != ADDR_SPACE_GENERIC)
3663 return x;
3665 return spu_legitimize_address (x, oldx, mode);
3668 /* Reload reg + const_int for out-of-range displacements. */
3670 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3671 int opnum, int type)
3673 bool removed_and = false;
3675 if (GET_CODE (ad) == AND
3676 && CONST_INT_P (XEXP (ad, 1))
3677 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3679 ad = XEXP (ad, 0);
3680 removed_and = true;
3683 if (GET_CODE (ad) == PLUS
3684 && REG_P (XEXP (ad, 0))
3685 && CONST_INT_P (XEXP (ad, 1))
3686 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3687 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3689 /* Unshare the sum. */
3690 ad = copy_rtx (ad);
3692 /* Reload the displacement. */
3693 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3694 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3695 opnum, (enum reload_type) type);
3697 /* Add back AND for alignment if we stripped it. */
3698 if (removed_and)
3699 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3701 return ad;
3704 return NULL_RTX;
3707 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3708 struct attribute_spec.handler. */
3709 static tree
3710 spu_handle_fndecl_attribute (tree * node,
3711 tree name,
3712 tree args ATTRIBUTE_UNUSED,
3713 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3715 if (TREE_CODE (*node) != FUNCTION_DECL)
3717 warning (0, "%qE attribute only applies to functions",
3718 name);
3719 *no_add_attrs = true;
3722 return NULL_TREE;
3725 /* Handle the "vector" attribute. */
3726 static tree
3727 spu_handle_vector_attribute (tree * node, tree name,
3728 tree args ATTRIBUTE_UNUSED,
3729 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3731 tree type = *node, result = NULL_TREE;
3732 machine_mode mode;
3733 int unsigned_p;
3735 while (POINTER_TYPE_P (type)
3736 || TREE_CODE (type) == FUNCTION_TYPE
3737 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3738 type = TREE_TYPE (type);
3740 mode = TYPE_MODE (type);
3742 unsigned_p = TYPE_UNSIGNED (type);
3743 switch (mode)
3745 case DImode:
3746 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3747 break;
3748 case SImode:
3749 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3750 break;
3751 case HImode:
3752 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3753 break;
3754 case QImode:
3755 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3756 break;
3757 case SFmode:
3758 result = V4SF_type_node;
3759 break;
3760 case DFmode:
3761 result = V2DF_type_node;
3762 break;
3763 default:
3764 break;
3767 /* Propagate qualifiers attached to the element type
3768 onto the vector type. */
3769 if (result && result != type && TYPE_QUALS (type))
3770 result = build_qualified_type (result, TYPE_QUALS (type));
3772 *no_add_attrs = true; /* No need to hang on to the attribute. */
3774 if (!result)
3775 warning (0, "%qE attribute ignored", name);
3776 else
3777 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3779 return NULL_TREE;
3782 /* Return nonzero if FUNC is a naked function. */
3783 static int
3784 spu_naked_function_p (tree func)
3786 tree a;
3788 if (TREE_CODE (func) != FUNCTION_DECL)
3789 abort ();
3791 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3792 return a != NULL_TREE;
3796 spu_initial_elimination_offset (int from, int to)
3798 int saved_regs_size = spu_saved_regs_size ();
3799 int sp_offset = 0;
3800 if (!crtl->is_leaf || crtl->outgoing_args_size
3801 || get_frame_size () || saved_regs_size)
3802 sp_offset = STACK_POINTER_OFFSET;
3803 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3804 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3805 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3806 return get_frame_size ();
3807 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3808 return sp_offset + crtl->outgoing_args_size
3809 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3810 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3811 return get_frame_size () + saved_regs_size + sp_offset;
3812 else
3813 gcc_unreachable ();
3817 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3819 machine_mode mode = TYPE_MODE (type);
3820 int byte_size = ((mode == BLKmode)
3821 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3823 /* Make sure small structs are left justified in a register. */
3824 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3825 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3827 machine_mode smode;
3828 rtvec v;
3829 int i;
3830 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3831 int n = byte_size / UNITS_PER_WORD;
3832 v = rtvec_alloc (nregs);
3833 for (i = 0; i < n; i++)
3835 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3836 gen_rtx_REG (TImode,
3837 FIRST_RETURN_REGNUM
3838 + i),
3839 GEN_INT (UNITS_PER_WORD * i));
3840 byte_size -= UNITS_PER_WORD;
3843 if (n < nregs)
3845 if (byte_size < 4)
3846 byte_size = 4;
3847 smode =
3848 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3849 RTVEC_ELT (v, n) =
3850 gen_rtx_EXPR_LIST (VOIDmode,
3851 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3852 GEN_INT (UNITS_PER_WORD * n));
3854 return gen_rtx_PARALLEL (mode, v);
3856 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3859 static rtx
3860 spu_function_arg (cumulative_args_t cum_v,
3861 machine_mode mode,
3862 const_tree type, bool named ATTRIBUTE_UNUSED)
3864 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3865 int byte_size;
3867 if (*cum >= MAX_REGISTER_ARGS)
3868 return 0;
3870 byte_size = ((mode == BLKmode)
3871 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3873 /* The ABI does not allow parameters to be passed partially in
3874 reg and partially in stack. */
3875 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3876 return 0;
3878 /* Make sure small structs are left justified in a register. */
3879 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3880 && byte_size < UNITS_PER_WORD && byte_size > 0)
3882 machine_mode smode;
3883 rtx gr_reg;
3884 if (byte_size < 4)
3885 byte_size = 4;
3886 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3887 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3888 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3889 const0_rtx);
3890 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3892 else
3893 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3896 static void
3897 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3898 const_tree type, bool named ATTRIBUTE_UNUSED)
3900 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3902 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3904 : mode == BLKmode
3905 ? ((int_size_in_bytes (type) + 15) / 16)
3906 : mode == VOIDmode
3908 : HARD_REGNO_NREGS (cum, mode));
3911 /* Variable sized types are passed by reference. */
3912 static bool
3913 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3914 machine_mode mode ATTRIBUTE_UNUSED,
3915 const_tree type, bool named ATTRIBUTE_UNUSED)
3917 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3921 /* Var args. */
3923 /* Create and return the va_list datatype.
3925 On SPU, va_list is an array type equivalent to
3927 typedef struct __va_list_tag
3929 void *__args __attribute__((__aligned(16)));
3930 void *__skip __attribute__((__aligned(16)));
3932 } va_list[1];
3934 where __args points to the arg that will be returned by the next
3935 va_arg(), and __skip points to the previous stack frame such that
3936 when __args == __skip we should advance __args by 32 bytes. */
3937 static tree
3938 spu_build_builtin_va_list (void)
3940 tree f_args, f_skip, record, type_decl;
3941 bool owp;
3943 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3945 type_decl =
3946 build_decl (BUILTINS_LOCATION,
3947 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3949 f_args = build_decl (BUILTINS_LOCATION,
3950 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3951 f_skip = build_decl (BUILTINS_LOCATION,
3952 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3954 DECL_FIELD_CONTEXT (f_args) = record;
3955 DECL_ALIGN (f_args) = 128;
3956 DECL_USER_ALIGN (f_args) = 1;
3958 DECL_FIELD_CONTEXT (f_skip) = record;
3959 DECL_ALIGN (f_skip) = 128;
3960 DECL_USER_ALIGN (f_skip) = 1;
3962 TYPE_STUB_DECL (record) = type_decl;
3963 TYPE_NAME (record) = type_decl;
3964 TYPE_FIELDS (record) = f_args;
3965 DECL_CHAIN (f_args) = f_skip;
3967 /* We know this is being padded and we want it too. It is an internal
3968 type so hide the warnings from the user. */
3969 owp = warn_padded;
3970 warn_padded = false;
3972 layout_type (record);
3974 warn_padded = owp;
3976 /* The correct type is an array type of one element. */
3977 return build_array_type (record, build_index_type (size_zero_node));
3980 /* Implement va_start by filling the va_list structure VALIST.
3981 NEXTARG points to the first anonymous stack argument.
3983 The following global variables are used to initialize
3984 the va_list structure:
3986 crtl->args.info;
3987 the CUMULATIVE_ARGS for this function
3989 crtl->args.arg_offset_rtx:
3990 holds the offset of the first anonymous stack argument
3991 (relative to the virtual arg pointer). */
3993 static void
3994 spu_va_start (tree valist, rtx nextarg)
3996 tree f_args, f_skip;
3997 tree args, skip, t;
3999 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4000 f_skip = DECL_CHAIN (f_args);
4002 valist = build_simple_mem_ref (valist);
4003 args =
4004 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4005 skip =
4006 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4008 /* Find the __args area. */
4009 t = make_tree (TREE_TYPE (args), nextarg);
4010 if (crtl->args.pretend_args_size > 0)
4011 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4012 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4013 TREE_SIDE_EFFECTS (t) = 1;
4014 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4016 /* Find the __skip area. */
4017 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4018 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4019 - STACK_POINTER_OFFSET));
4020 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4021 TREE_SIDE_EFFECTS (t) = 1;
4022 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4025 /* Gimplify va_arg by updating the va_list structure
4026 VALIST as required to retrieve an argument of type
4027 TYPE, and returning that argument.
4029 ret = va_arg(VALIST, TYPE);
4031 generates code equivalent to:
4033 paddedsize = (sizeof(TYPE) + 15) & -16;
4034 if (VALIST.__args + paddedsize > VALIST.__skip
4035 && VALIST.__args <= VALIST.__skip)
4036 addr = VALIST.__skip + 32;
4037 else
4038 addr = VALIST.__args;
4039 VALIST.__args = addr + paddedsize;
4040 ret = *(TYPE *)addr;
4042 static tree
4043 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4044 gimple_seq * post_p ATTRIBUTE_UNUSED)
4046 tree f_args, f_skip;
4047 tree args, skip;
4048 HOST_WIDE_INT size, rsize;
4049 tree addr, tmp;
4050 bool pass_by_reference_p;
4052 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4053 f_skip = DECL_CHAIN (f_args);
4055 valist = build_simple_mem_ref (valist);
4056 args =
4057 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4058 skip =
4059 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4061 addr = create_tmp_var (ptr_type_node, "va_arg");
4063 /* if an object is dynamically sized, a pointer to it is passed
4064 instead of the object itself. */
4065 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4066 false);
4067 if (pass_by_reference_p)
4068 type = build_pointer_type (type);
4069 size = int_size_in_bytes (type);
4070 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4072 /* build conditional expression to calculate addr. The expression
4073 will be gimplified later. */
4074 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4075 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4076 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4077 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4078 unshare_expr (skip)));
4080 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4081 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4082 unshare_expr (args));
4084 gimplify_assign (addr, tmp, pre_p);
4086 /* update VALIST.__args */
4087 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4088 gimplify_assign (unshare_expr (args), tmp, pre_p);
4090 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4091 addr);
4093 if (pass_by_reference_p)
4094 addr = build_va_arg_indirect_ref (addr);
4096 return build_va_arg_indirect_ref (addr);
4099 /* Save parameter registers starting with the register that corresponds
4100 to the first unnamed parameters. If the first unnamed parameter is
4101 in the stack then save no registers. Set pretend_args_size to the
4102 amount of space needed to save the registers. */
4103 static void
4104 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4105 tree type, int *pretend_size, int no_rtl)
4107 if (!no_rtl)
4109 rtx tmp;
4110 int regno;
4111 int offset;
4112 int ncum = *get_cumulative_args (cum);
4114 /* cum currently points to the last named argument, we want to
4115 start at the next argument. */
4116 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4118 offset = -STACK_POINTER_OFFSET;
4119 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4121 tmp = gen_frame_mem (V4SImode,
4122 plus_constant (Pmode, virtual_incoming_args_rtx,
4123 offset));
4124 emit_move_insn (tmp,
4125 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4126 offset += 16;
4128 *pretend_size = offset + STACK_POINTER_OFFSET;
4132 static void
4133 spu_conditional_register_usage (void)
4135 if (flag_pic)
4137 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4138 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4142 /* This is called any time we inspect the alignment of a register for
4143 addresses. */
4144 static int
4145 reg_aligned_for_addr (rtx x)
4147 int regno =
4148 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4149 return REGNO_POINTER_ALIGN (regno) >= 128;
4152 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4153 into its SYMBOL_REF_FLAGS. */
4154 static void
4155 spu_encode_section_info (tree decl, rtx rtl, int first)
4157 default_encode_section_info (decl, rtl, first);
4159 /* If a variable has a forced alignment to < 16 bytes, mark it with
4160 SYMBOL_FLAG_ALIGN1. */
4161 if (TREE_CODE (decl) == VAR_DECL
4162 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4163 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4166 /* Return TRUE if we are certain the mem refers to a complete object
4167 which is both 16-byte aligned and padded to a 16-byte boundary. This
4168 would make it safe to store with a single instruction.
4169 We guarantee the alignment and padding for static objects by aligning
4170 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4171 FIXME: We currently cannot guarantee this for objects on the stack
4172 because assign_parm_setup_stack calls assign_stack_local with the
4173 alignment of the parameter mode and in that case the alignment never
4174 gets adjusted by LOCAL_ALIGNMENT. */
4175 static int
4176 store_with_one_insn_p (rtx mem)
4178 machine_mode mode = GET_MODE (mem);
4179 rtx addr = XEXP (mem, 0);
4180 if (mode == BLKmode)
4181 return 0;
4182 if (GET_MODE_SIZE (mode) >= 16)
4183 return 1;
4184 /* Only static objects. */
4185 if (GET_CODE (addr) == SYMBOL_REF)
4187 /* We use the associated declaration to make sure the access is
4188 referring to the whole object.
4189 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4190 if it is necessary. Will there be cases where one exists, and
4191 the other does not? Will there be cases where both exist, but
4192 have different types? */
4193 tree decl = MEM_EXPR (mem);
4194 if (decl
4195 && TREE_CODE (decl) == VAR_DECL
4196 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4197 return 1;
4198 decl = SYMBOL_REF_DECL (addr);
4199 if (decl
4200 && TREE_CODE (decl) == VAR_DECL
4201 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4202 return 1;
4204 return 0;
4207 /* Return 1 when the address is not valid for a simple load and store as
4208 required by the '_mov*' patterns. We could make this less strict
4209 for loads, but we prefer mem's to look the same so they are more
4210 likely to be merged. */
4211 static int
4212 address_needs_split (rtx mem)
4214 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4215 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4216 || !(store_with_one_insn_p (mem)
4217 || mem_is_padded_component_ref (mem))))
4218 return 1;
4220 return 0;
4223 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4224 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4225 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4227 /* MEM is known to be an __ea qualified memory access. Emit a call to
4228 fetch the ppu memory to local store, and return its address in local
4229 store. */
4231 static void
4232 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4234 if (is_store)
4236 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4237 if (!cache_fetch_dirty)
4238 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4239 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4240 2, ea_addr, EAmode, ndirty, SImode);
4242 else
4244 if (!cache_fetch)
4245 cache_fetch = init_one_libfunc ("__cache_fetch");
4246 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4247 1, ea_addr, EAmode);
4251 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4252 dirty bit marking, inline.
4254 The cache control data structure is an array of
4256 struct __cache_tag_array
4258 unsigned int tag_lo[4];
4259 unsigned int tag_hi[4];
4260 void *data_pointer[4];
4261 int reserved[4];
4262 vector unsigned short dirty_bits[4];
4263 } */
4265 static void
4266 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4268 rtx ea_addr_si;
4269 HOST_WIDE_INT v;
4270 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4271 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4272 rtx index_mask = gen_reg_rtx (SImode);
4273 rtx tag_arr = gen_reg_rtx (Pmode);
4274 rtx splat_mask = gen_reg_rtx (TImode);
4275 rtx splat = gen_reg_rtx (V4SImode);
4276 rtx splat_hi = NULL_RTX;
4277 rtx tag_index = gen_reg_rtx (Pmode);
4278 rtx block_off = gen_reg_rtx (SImode);
4279 rtx tag_addr = gen_reg_rtx (Pmode);
4280 rtx tag = gen_reg_rtx (V4SImode);
4281 rtx cache_tag = gen_reg_rtx (V4SImode);
4282 rtx cache_tag_hi = NULL_RTX;
4283 rtx cache_ptrs = gen_reg_rtx (TImode);
4284 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4285 rtx tag_equal = gen_reg_rtx (V4SImode);
4286 rtx tag_equal_hi = NULL_RTX;
4287 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4288 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4289 rtx eq_index = gen_reg_rtx (SImode);
4290 rtx bcomp, hit_label, hit_ref, cont_label;
4291 rtx_insn *insn;
4293 if (spu_ea_model != 32)
4295 splat_hi = gen_reg_rtx (V4SImode);
4296 cache_tag_hi = gen_reg_rtx (V4SImode);
4297 tag_equal_hi = gen_reg_rtx (V4SImode);
4300 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4301 emit_move_insn (tag_arr, tag_arr_sym);
4302 v = 0x0001020300010203LL;
4303 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4304 ea_addr_si = ea_addr;
4305 if (spu_ea_model != 32)
4306 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4308 /* tag_index = ea_addr & (tag_array_size - 128) */
4309 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4311 /* splat ea_addr to all 4 slots. */
4312 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4313 /* Similarly for high 32 bits of ea_addr. */
4314 if (spu_ea_model != 32)
4315 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4317 /* block_off = ea_addr & 127 */
4318 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4320 /* tag_addr = tag_arr + tag_index */
4321 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4323 /* Read cache tags. */
4324 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4325 if (spu_ea_model != 32)
4326 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4327 plus_constant (Pmode,
4328 tag_addr, 16)));
4330 /* tag = ea_addr & -128 */
4331 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4333 /* Read all four cache data pointers. */
4334 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4335 plus_constant (Pmode,
4336 tag_addr, 32)));
4338 /* Compare tags. */
4339 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4340 if (spu_ea_model != 32)
4342 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4343 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4346 /* At most one of the tags compare equal, so tag_equal has one
4347 32-bit slot set to all 1's, with the other slots all zero.
4348 gbb picks off low bit from each byte in the 128-bit registers,
4349 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4350 we have a hit. */
4351 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4352 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4354 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4355 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4357 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4358 (rotating eq_index mod 16 bytes). */
4359 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4360 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4362 /* Add block offset to form final data address. */
4363 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4365 /* Check that we did hit. */
4366 hit_label = gen_label_rtx ();
4367 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4368 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4369 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4370 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4371 hit_ref, pc_rtx)));
4372 /* Say that this branch is very likely to happen. */
4373 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4374 add_int_reg_note (insn, REG_BR_PROB, v);
4376 ea_load_store (mem, is_store, ea_addr, data_addr);
4377 cont_label = gen_label_rtx ();
4378 emit_jump_insn (gen_jump (cont_label));
4379 emit_barrier ();
4381 emit_label (hit_label);
4383 if (is_store)
4385 HOST_WIDE_INT v_hi;
4386 rtx dirty_bits = gen_reg_rtx (TImode);
4387 rtx dirty_off = gen_reg_rtx (SImode);
4388 rtx dirty_128 = gen_reg_rtx (TImode);
4389 rtx neg_block_off = gen_reg_rtx (SImode);
4391 /* Set up mask with one dirty bit per byte of the mem we are
4392 writing, starting from top bit. */
4393 v_hi = v = -1;
4394 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4395 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4397 v_hi = v;
4398 v = 0;
4400 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4402 /* Form index into cache dirty_bits. eq_index is one of
4403 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4404 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4405 offset to each of the four dirty_bits elements. */
4406 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4408 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4410 /* Rotate bit mask to proper bit. */
4411 emit_insn (gen_negsi2 (neg_block_off, block_off));
4412 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4413 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4415 /* Or in the new dirty bits. */
4416 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4418 /* Store. */
4419 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4422 emit_label (cont_label);
4425 static rtx
4426 expand_ea_mem (rtx mem, bool is_store)
4428 rtx ea_addr;
4429 rtx data_addr = gen_reg_rtx (Pmode);
4430 rtx new_mem;
4432 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4433 if (optimize_size || optimize == 0)
4434 ea_load_store (mem, is_store, ea_addr, data_addr);
4435 else
4436 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4438 if (ea_alias_set == -1)
4439 ea_alias_set = new_alias_set ();
4441 /* We generate a new MEM RTX to refer to the copy of the data
4442 in the cache. We do not copy memory attributes (except the
4443 alignment) from the original MEM, as they may no longer apply
4444 to the cache copy. */
4445 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4446 set_mem_alias_set (new_mem, ea_alias_set);
4447 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4449 return new_mem;
4453 spu_expand_mov (rtx * ops, machine_mode mode)
4455 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4457 /* Perform the move in the destination SUBREG's inner mode. */
4458 ops[0] = SUBREG_REG (ops[0]);
4459 mode = GET_MODE (ops[0]);
4460 ops[1] = gen_lowpart_common (mode, ops[1]);
4461 gcc_assert (ops[1]);
4464 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4466 rtx from = SUBREG_REG (ops[1]);
4467 machine_mode imode = int_mode_for_mode (GET_MODE (from));
4469 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4470 && GET_MODE_CLASS (imode) == MODE_INT
4471 && subreg_lowpart_p (ops[1]));
4473 if (GET_MODE_SIZE (imode) < 4)
4474 imode = SImode;
4475 if (imode != GET_MODE (from))
4476 from = gen_rtx_SUBREG (imode, from, 0);
4478 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4480 enum insn_code icode = convert_optab_handler (trunc_optab,
4481 mode, imode);
4482 emit_insn (GEN_FCN (icode) (ops[0], from));
4484 else
4485 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4486 return 1;
4489 /* At least one of the operands needs to be a register. */
4490 if ((reload_in_progress | reload_completed) == 0
4491 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4493 rtx temp = force_reg (mode, ops[1]);
4494 emit_move_insn (ops[0], temp);
4495 return 1;
4497 if (reload_in_progress || reload_completed)
4499 if (CONSTANT_P (ops[1]))
4500 return spu_split_immediate (ops);
4501 return 0;
4504 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4505 extend them. */
4506 if (GET_CODE (ops[1]) == CONST_INT)
4508 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4509 if (val != INTVAL (ops[1]))
4511 emit_move_insn (ops[0], GEN_INT (val));
4512 return 1;
4515 if (MEM_P (ops[0]))
4517 if (MEM_ADDR_SPACE (ops[0]))
4518 ops[0] = expand_ea_mem (ops[0], true);
4519 return spu_split_store (ops);
4521 if (MEM_P (ops[1]))
4523 if (MEM_ADDR_SPACE (ops[1]))
4524 ops[1] = expand_ea_mem (ops[1], false);
4525 return spu_split_load (ops);
4528 return 0;
4531 static void
4532 spu_convert_move (rtx dst, rtx src)
4534 machine_mode mode = GET_MODE (dst);
4535 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4536 rtx reg;
4537 gcc_assert (GET_MODE (src) == TImode);
4538 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4539 emit_insn (gen_rtx_SET (VOIDmode, reg,
4540 gen_rtx_TRUNCATE (int_mode,
4541 gen_rtx_LSHIFTRT (TImode, src,
4542 GEN_INT (int_mode == DImode ? 64 : 96)))));
4543 if (int_mode != mode)
4545 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4546 emit_move_insn (dst, reg);
4550 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4551 the address from SRC and SRC+16. Return a REG or CONST_INT that
4552 specifies how many bytes to rotate the loaded registers, plus any
4553 extra from EXTRA_ROTQBY. The address and rotate amounts are
4554 normalized to improve merging of loads and rotate computations. */
4555 static rtx
4556 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4558 rtx addr = XEXP (src, 0);
4559 rtx p0, p1, rot, addr0, addr1;
4560 int rot_amt;
4562 rot = 0;
4563 rot_amt = 0;
4565 if (MEM_ALIGN (src) >= 128)
4566 /* Address is already aligned; simply perform a TImode load. */ ;
4567 else if (GET_CODE (addr) == PLUS)
4569 /* 8 cases:
4570 aligned reg + aligned reg => lqx
4571 aligned reg + unaligned reg => lqx, rotqby
4572 aligned reg + aligned const => lqd
4573 aligned reg + unaligned const => lqd, rotqbyi
4574 unaligned reg + aligned reg => lqx, rotqby
4575 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4576 unaligned reg + aligned const => lqd, rotqby
4577 unaligned reg + unaligned const -> not allowed by legitimate address
4579 p0 = XEXP (addr, 0);
4580 p1 = XEXP (addr, 1);
4581 if (!reg_aligned_for_addr (p0))
4583 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4585 rot = gen_reg_rtx (SImode);
4586 emit_insn (gen_addsi3 (rot, p0, p1));
4588 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4590 if (INTVAL (p1) > 0
4591 && REG_POINTER (p0)
4592 && INTVAL (p1) * BITS_PER_UNIT
4593 < REGNO_POINTER_ALIGN (REGNO (p0)))
4595 rot = gen_reg_rtx (SImode);
4596 emit_insn (gen_addsi3 (rot, p0, p1));
4597 addr = p0;
4599 else
4601 rtx x = gen_reg_rtx (SImode);
4602 emit_move_insn (x, p1);
4603 if (!spu_arith_operand (p1, SImode))
4604 p1 = x;
4605 rot = gen_reg_rtx (SImode);
4606 emit_insn (gen_addsi3 (rot, p0, p1));
4607 addr = gen_rtx_PLUS (Pmode, p0, x);
4610 else
4611 rot = p0;
4613 else
4615 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4617 rot_amt = INTVAL (p1) & 15;
4618 if (INTVAL (p1) & -16)
4620 p1 = GEN_INT (INTVAL (p1) & -16);
4621 addr = gen_rtx_PLUS (SImode, p0, p1);
4623 else
4624 addr = p0;
4626 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4627 rot = p1;
4630 else if (REG_P (addr))
4632 if (!reg_aligned_for_addr (addr))
4633 rot = addr;
4635 else if (GET_CODE (addr) == CONST)
4637 if (GET_CODE (XEXP (addr, 0)) == PLUS
4638 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4639 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4641 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4642 if (rot_amt & -16)
4643 addr = gen_rtx_CONST (Pmode,
4644 gen_rtx_PLUS (Pmode,
4645 XEXP (XEXP (addr, 0), 0),
4646 GEN_INT (rot_amt & -16)));
4647 else
4648 addr = XEXP (XEXP (addr, 0), 0);
4650 else
4652 rot = gen_reg_rtx (Pmode);
4653 emit_move_insn (rot, addr);
4656 else if (GET_CODE (addr) == CONST_INT)
4658 rot_amt = INTVAL (addr);
4659 addr = GEN_INT (rot_amt & -16);
4661 else if (!ALIGNED_SYMBOL_REF_P (addr))
4663 rot = gen_reg_rtx (Pmode);
4664 emit_move_insn (rot, addr);
4667 rot_amt += extra_rotby;
4669 rot_amt &= 15;
4671 if (rot && rot_amt)
4673 rtx x = gen_reg_rtx (SImode);
4674 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4675 rot = x;
4676 rot_amt = 0;
4678 if (!rot && rot_amt)
4679 rot = GEN_INT (rot_amt);
4681 addr0 = copy_rtx (addr);
4682 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4683 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4685 if (dst1)
4687 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4688 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4689 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4692 return rot;
4696 spu_split_load (rtx * ops)
4698 machine_mode mode = GET_MODE (ops[0]);
4699 rtx addr, load, rot;
4700 int rot_amt;
4702 if (GET_MODE_SIZE (mode) >= 16)
4703 return 0;
4705 addr = XEXP (ops[1], 0);
4706 gcc_assert (GET_CODE (addr) != AND);
4708 if (!address_needs_split (ops[1]))
4710 ops[1] = change_address (ops[1], TImode, addr);
4711 load = gen_reg_rtx (TImode);
4712 emit_insn (gen__movti (load, ops[1]));
4713 spu_convert_move (ops[0], load);
4714 return 1;
4717 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4719 load = gen_reg_rtx (TImode);
4720 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4722 if (rot)
4723 emit_insn (gen_rotqby_ti (load, load, rot));
4725 spu_convert_move (ops[0], load);
4726 return 1;
4730 spu_split_store (rtx * ops)
4732 machine_mode mode = GET_MODE (ops[0]);
4733 rtx reg;
4734 rtx addr, p0, p1, p1_lo, smem;
4735 int aform;
4736 int scalar;
4738 if (GET_MODE_SIZE (mode) >= 16)
4739 return 0;
4741 addr = XEXP (ops[0], 0);
4742 gcc_assert (GET_CODE (addr) != AND);
4744 if (!address_needs_split (ops[0]))
4746 reg = gen_reg_rtx (TImode);
4747 emit_insn (gen_spu_convert (reg, ops[1]));
4748 ops[0] = change_address (ops[0], TImode, addr);
4749 emit_move_insn (ops[0], reg);
4750 return 1;
4753 if (GET_CODE (addr) == PLUS)
4755 /* 8 cases:
4756 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4757 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4758 aligned reg + aligned const => lqd, c?d, shuf, stqx
4759 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4760 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4761 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4762 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4763 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4765 aform = 0;
4766 p0 = XEXP (addr, 0);
4767 p1 = p1_lo = XEXP (addr, 1);
4768 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4770 p1_lo = GEN_INT (INTVAL (p1) & 15);
4771 if (reg_aligned_for_addr (p0))
4773 p1 = GEN_INT (INTVAL (p1) & -16);
4774 if (p1 == const0_rtx)
4775 addr = p0;
4776 else
4777 addr = gen_rtx_PLUS (SImode, p0, p1);
4779 else
4781 rtx x = gen_reg_rtx (SImode);
4782 emit_move_insn (x, p1);
4783 addr = gen_rtx_PLUS (SImode, p0, x);
4787 else if (REG_P (addr))
4789 aform = 0;
4790 p0 = addr;
4791 p1 = p1_lo = const0_rtx;
4793 else
4795 aform = 1;
4796 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4797 p1 = 0; /* aform doesn't use p1 */
4798 p1_lo = addr;
4799 if (ALIGNED_SYMBOL_REF_P (addr))
4800 p1_lo = const0_rtx;
4801 else if (GET_CODE (addr) == CONST
4802 && GET_CODE (XEXP (addr, 0)) == PLUS
4803 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4804 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4806 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4807 if ((v & -16) != 0)
4808 addr = gen_rtx_CONST (Pmode,
4809 gen_rtx_PLUS (Pmode,
4810 XEXP (XEXP (addr, 0), 0),
4811 GEN_INT (v & -16)));
4812 else
4813 addr = XEXP (XEXP (addr, 0), 0);
4814 p1_lo = GEN_INT (v & 15);
4816 else if (GET_CODE (addr) == CONST_INT)
4818 p1_lo = GEN_INT (INTVAL (addr) & 15);
4819 addr = GEN_INT (INTVAL (addr) & -16);
4821 else
4823 p1_lo = gen_reg_rtx (SImode);
4824 emit_move_insn (p1_lo, addr);
4828 gcc_assert (aform == 0 || aform == 1);
4829 reg = gen_reg_rtx (TImode);
4831 scalar = store_with_one_insn_p (ops[0]);
4832 if (!scalar)
4834 /* We could copy the flags from the ops[0] MEM to mem here,
4835 We don't because we want this load to be optimized away if
4836 possible, and copying the flags will prevent that in certain
4837 cases, e.g. consider the volatile flag. */
4839 rtx pat = gen_reg_rtx (TImode);
4840 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4841 set_mem_alias_set (lmem, 0);
4842 emit_insn (gen_movti (reg, lmem));
4844 if (!p0 || reg_aligned_for_addr (p0))
4845 p0 = stack_pointer_rtx;
4846 if (!p1_lo)
4847 p1_lo = const0_rtx;
4849 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4850 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4852 else
4854 if (GET_CODE (ops[1]) == REG)
4855 emit_insn (gen_spu_convert (reg, ops[1]));
4856 else if (GET_CODE (ops[1]) == SUBREG)
4857 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4858 else
4859 abort ();
4862 if (GET_MODE_SIZE (mode) < 4 && scalar)
4863 emit_insn (gen_ashlti3
4864 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4866 smem = change_address (ops[0], TImode, copy_rtx (addr));
4867 /* We can't use the previous alias set because the memory has changed
4868 size and can potentially overlap objects of other types. */
4869 set_mem_alias_set (smem, 0);
4871 emit_insn (gen_movti (smem, reg));
4872 return 1;
4875 /* Return TRUE if X is MEM which is a struct member reference
4876 and the member can safely be loaded and stored with a single
4877 instruction because it is padded. */
4878 static int
4879 mem_is_padded_component_ref (rtx x)
4881 tree t = MEM_EXPR (x);
4882 tree r;
4883 if (!t || TREE_CODE (t) != COMPONENT_REF)
4884 return 0;
4885 t = TREE_OPERAND (t, 1);
4886 if (!t || TREE_CODE (t) != FIELD_DECL
4887 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4888 return 0;
4889 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4890 r = DECL_FIELD_CONTEXT (t);
4891 if (!r || TREE_CODE (r) != RECORD_TYPE)
4892 return 0;
4893 /* Make sure they are the same mode */
4894 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4895 return 0;
4896 /* If there are no following fields then the field alignment assures
4897 the structure is padded to the alignment which means this field is
4898 padded too. */
4899 if (TREE_CHAIN (t) == 0)
4900 return 1;
4901 /* If the following field is also aligned then this field will be
4902 padded. */
4903 t = TREE_CHAIN (t);
4904 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4905 return 1;
4906 return 0;
4909 /* Parse the -mfixed-range= option string. */
4910 static void
4911 fix_range (const char *const_str)
4913 int i, first, last;
4914 char *str, *dash, *comma;
4916 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4917 REG2 are either register names or register numbers. The effect
4918 of this option is to mark the registers in the range from REG1 to
4919 REG2 as ``fixed'' so they won't be used by the compiler. */
4921 i = strlen (const_str);
4922 str = (char *) alloca (i + 1);
4923 memcpy (str, const_str, i + 1);
4925 while (1)
4927 dash = strchr (str, '-');
4928 if (!dash)
4930 warning (0, "value of -mfixed-range must have form REG1-REG2");
4931 return;
4933 *dash = '\0';
4934 comma = strchr (dash + 1, ',');
4935 if (comma)
4936 *comma = '\0';
4938 first = decode_reg_name (str);
4939 if (first < 0)
4941 warning (0, "unknown register name: %s", str);
4942 return;
4945 last = decode_reg_name (dash + 1);
4946 if (last < 0)
4948 warning (0, "unknown register name: %s", dash + 1);
4949 return;
4952 *dash = '-';
4954 if (first > last)
4956 warning (0, "%s-%s is an empty range", str, dash + 1);
4957 return;
4960 for (i = first; i <= last; ++i)
4961 fixed_regs[i] = call_used_regs[i] = 1;
4963 if (!comma)
4964 break;
4966 *comma = ',';
4967 str = comma + 1;
4971 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4972 can be generated using the fsmbi instruction. */
4974 fsmbi_const_p (rtx x)
4976 if (CONSTANT_P (x))
4978 /* We can always choose TImode for CONST_INT because the high bits
4979 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4980 enum immediate_class c = classify_immediate (x, TImode);
4981 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4983 return 0;
4986 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4987 can be generated using the cbd, chd, cwd or cdd instruction. */
4989 cpat_const_p (rtx x, machine_mode mode)
4991 if (CONSTANT_P (x))
4993 enum immediate_class c = classify_immediate (x, mode);
4994 return c == IC_CPAT;
4996 return 0;
5000 gen_cpat_const (rtx * ops)
5002 unsigned char dst[16];
5003 int i, offset, shift, isize;
5004 if (GET_CODE (ops[3]) != CONST_INT
5005 || GET_CODE (ops[2]) != CONST_INT
5006 || (GET_CODE (ops[1]) != CONST_INT
5007 && GET_CODE (ops[1]) != REG))
5008 return 0;
5009 if (GET_CODE (ops[1]) == REG
5010 && (!REG_POINTER (ops[1])
5011 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5012 return 0;
5014 for (i = 0; i < 16; i++)
5015 dst[i] = i + 16;
5016 isize = INTVAL (ops[3]);
5017 if (isize == 1)
5018 shift = 3;
5019 else if (isize == 2)
5020 shift = 2;
5021 else
5022 shift = 0;
5023 offset = (INTVAL (ops[2]) +
5024 (GET_CODE (ops[1]) ==
5025 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5026 for (i = 0; i < isize; i++)
5027 dst[offset + i] = i + shift;
5028 return array_to_constant (TImode, dst);
5031 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5032 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5033 than 16 bytes, the value is repeated across the rest of the array. */
5034 void
5035 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5037 HOST_WIDE_INT val;
5038 int i, j, first;
5040 memset (arr, 0, 16);
5041 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5042 if (GET_CODE (x) == CONST_INT
5043 || (GET_CODE (x) == CONST_DOUBLE
5044 && (mode == SFmode || mode == DFmode)))
5046 gcc_assert (mode != VOIDmode && mode != BLKmode);
5048 if (GET_CODE (x) == CONST_DOUBLE)
5049 val = const_double_to_hwint (x);
5050 else
5051 val = INTVAL (x);
5052 first = GET_MODE_SIZE (mode) - 1;
5053 for (i = first; i >= 0; i--)
5055 arr[i] = val & 0xff;
5056 val >>= 8;
5058 /* Splat the constant across the whole array. */
5059 for (j = 0, i = first + 1; i < 16; i++)
5061 arr[i] = arr[j];
5062 j = (j == first) ? 0 : j + 1;
5065 else if (GET_CODE (x) == CONST_DOUBLE)
5067 val = CONST_DOUBLE_LOW (x);
5068 for (i = 15; i >= 8; i--)
5070 arr[i] = val & 0xff;
5071 val >>= 8;
5073 val = CONST_DOUBLE_HIGH (x);
5074 for (i = 7; i >= 0; i--)
5076 arr[i] = val & 0xff;
5077 val >>= 8;
5080 else if (GET_CODE (x) == CONST_VECTOR)
5082 int units;
5083 rtx elt;
5084 mode = GET_MODE_INNER (mode);
5085 units = CONST_VECTOR_NUNITS (x);
5086 for (i = 0; i < units; i++)
5088 elt = CONST_VECTOR_ELT (x, i);
5089 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5091 if (GET_CODE (elt) == CONST_DOUBLE)
5092 val = const_double_to_hwint (elt);
5093 else
5094 val = INTVAL (elt);
5095 first = GET_MODE_SIZE (mode) - 1;
5096 if (first + i * GET_MODE_SIZE (mode) > 16)
5097 abort ();
5098 for (j = first; j >= 0; j--)
5100 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5101 val >>= 8;
5106 else
5107 gcc_unreachable();
5110 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5111 smaller than 16 bytes, use the bytes that would represent that value
5112 in a register, e.g., for QImode return the value of arr[3]. */
5114 array_to_constant (machine_mode mode, const unsigned char arr[16])
5116 machine_mode inner_mode;
5117 rtvec v;
5118 int units, size, i, j, k;
5119 HOST_WIDE_INT val;
5121 if (GET_MODE_CLASS (mode) == MODE_INT
5122 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5124 j = GET_MODE_SIZE (mode);
5125 i = j < 4 ? 4 - j : 0;
5126 for (val = 0; i < j; i++)
5127 val = (val << 8) | arr[i];
5128 val = trunc_int_for_mode (val, mode);
5129 return GEN_INT (val);
5132 if (mode == TImode)
5134 HOST_WIDE_INT high;
5135 for (i = high = 0; i < 8; i++)
5136 high = (high << 8) | arr[i];
5137 for (i = 8, val = 0; i < 16; i++)
5138 val = (val << 8) | arr[i];
5139 return immed_double_const (val, high, TImode);
5141 if (mode == SFmode)
5143 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5144 val = trunc_int_for_mode (val, SImode);
5145 return hwint_to_const_double (SFmode, val);
5147 if (mode == DFmode)
5149 for (i = 0, val = 0; i < 8; i++)
5150 val = (val << 8) | arr[i];
5151 return hwint_to_const_double (DFmode, val);
5154 if (!VECTOR_MODE_P (mode))
5155 abort ();
5157 units = GET_MODE_NUNITS (mode);
5158 size = GET_MODE_UNIT_SIZE (mode);
5159 inner_mode = GET_MODE_INNER (mode);
5160 v = rtvec_alloc (units);
5162 for (k = i = 0; i < units; ++i)
5164 val = 0;
5165 for (j = 0; j < size; j++, k++)
5166 val = (val << 8) | arr[k];
5168 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5169 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5170 else
5171 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5173 if (k > 16)
5174 abort ();
5176 return gen_rtx_CONST_VECTOR (mode, v);
5179 static void
5180 reloc_diagnostic (rtx x)
5182 tree decl = 0;
5183 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5184 return;
5186 if (GET_CODE (x) == SYMBOL_REF)
5187 decl = SYMBOL_REF_DECL (x);
5188 else if (GET_CODE (x) == CONST
5189 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5190 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5192 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5193 if (decl && !DECL_P (decl))
5194 decl = 0;
5196 /* The decl could be a string constant. */
5197 if (decl && DECL_P (decl))
5199 location_t loc;
5200 /* We use last_assemble_variable_decl to get line information. It's
5201 not always going to be right and might not even be close, but will
5202 be right for the more common cases. */
5203 if (!last_assemble_variable_decl || in_section == ctors_section)
5204 loc = DECL_SOURCE_LOCATION (decl);
5205 else
5206 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5208 if (TARGET_WARN_RELOC)
5209 warning_at (loc, 0,
5210 "creating run-time relocation for %qD", decl);
5211 else
5212 error_at (loc,
5213 "creating run-time relocation for %qD", decl);
5215 else
5217 if (TARGET_WARN_RELOC)
5218 warning_at (input_location, 0, "creating run-time relocation");
5219 else
5220 error_at (input_location, "creating run-time relocation");
5224 /* Hook into assemble_integer so we can generate an error for run-time
5225 relocations. The SPU ABI disallows them. */
5226 static bool
5227 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5229 /* By default run-time relocations aren't supported, but we allow them
5230 in case users support it in their own run-time loader. And we provide
5231 a warning for those users that don't. */
5232 if ((GET_CODE (x) == SYMBOL_REF)
5233 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5234 reloc_diagnostic (x);
5236 return default_assemble_integer (x, size, aligned_p);
5239 static void
5240 spu_asm_globalize_label (FILE * file, const char *name)
5242 fputs ("\t.global\t", file);
5243 assemble_name (file, name);
5244 fputs ("\n", file);
5247 static bool
5248 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5249 int opno ATTRIBUTE_UNUSED, int *total,
5250 bool speed ATTRIBUTE_UNUSED)
5252 machine_mode mode = GET_MODE (x);
5253 int cost = COSTS_N_INSNS (2);
5255 /* Folding to a CONST_VECTOR will use extra space but there might
5256 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5257 only if it allows us to fold away multiple insns. Changing the cost
5258 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5259 because this cost will only be compared against a single insn.
5260 if (code == CONST_VECTOR)
5261 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5264 /* Use defaults for float operations. Not accurate but good enough. */
5265 if (mode == DFmode)
5267 *total = COSTS_N_INSNS (13);
5268 return true;
5270 if (mode == SFmode)
5272 *total = COSTS_N_INSNS (6);
5273 return true;
5275 switch (code)
5277 case CONST_INT:
5278 if (satisfies_constraint_K (x))
5279 *total = 0;
5280 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5281 *total = COSTS_N_INSNS (1);
5282 else
5283 *total = COSTS_N_INSNS (3);
5284 return true;
5286 case CONST:
5287 *total = COSTS_N_INSNS (3);
5288 return true;
5290 case LABEL_REF:
5291 case SYMBOL_REF:
5292 *total = COSTS_N_INSNS (0);
5293 return true;
5295 case CONST_DOUBLE:
5296 *total = COSTS_N_INSNS (5);
5297 return true;
5299 case FLOAT_EXTEND:
5300 case FLOAT_TRUNCATE:
5301 case FLOAT:
5302 case UNSIGNED_FLOAT:
5303 case FIX:
5304 case UNSIGNED_FIX:
5305 *total = COSTS_N_INSNS (7);
5306 return true;
5308 case PLUS:
5309 if (mode == TImode)
5311 *total = COSTS_N_INSNS (9);
5312 return true;
5314 break;
5316 case MULT:
5317 cost =
5318 GET_CODE (XEXP (x, 0)) ==
5319 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5320 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5322 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5324 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5325 cost = COSTS_N_INSNS (14);
5326 if ((val & 0xffff) == 0)
5327 cost = COSTS_N_INSNS (9);
5328 else if (val > 0 && val < 0x10000)
5329 cost = COSTS_N_INSNS (11);
5332 *total = cost;
5333 return true;
5334 case DIV:
5335 case UDIV:
5336 case MOD:
5337 case UMOD:
5338 *total = COSTS_N_INSNS (20);
5339 return true;
5340 case ROTATE:
5341 case ROTATERT:
5342 case ASHIFT:
5343 case ASHIFTRT:
5344 case LSHIFTRT:
5345 *total = COSTS_N_INSNS (4);
5346 return true;
5347 case UNSPEC:
5348 if (XINT (x, 1) == UNSPEC_CONVERT)
5349 *total = COSTS_N_INSNS (0);
5350 else
5351 *total = COSTS_N_INSNS (4);
5352 return true;
5354 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5355 if (GET_MODE_CLASS (mode) == MODE_INT
5356 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5357 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5358 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5359 *total = cost;
5360 return true;
5363 static machine_mode
5364 spu_unwind_word_mode (void)
5366 return SImode;
5369 /* Decide whether we can make a sibling call to a function. DECL is the
5370 declaration of the function being targeted by the call and EXP is the
5371 CALL_EXPR representing the call. */
5372 static bool
5373 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5375 return decl && !TARGET_LARGE_MEM;
5378 /* We need to correctly update the back chain pointer and the Available
5379 Stack Size (which is in the second slot of the sp register.) */
5380 void
5381 spu_allocate_stack (rtx op0, rtx op1)
5383 HOST_WIDE_INT v;
5384 rtx chain = gen_reg_rtx (V4SImode);
5385 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5386 rtx sp = gen_reg_rtx (V4SImode);
5387 rtx splatted = gen_reg_rtx (V4SImode);
5388 rtx pat = gen_reg_rtx (TImode);
5390 /* copy the back chain so we can save it back again. */
5391 emit_move_insn (chain, stack_bot);
5393 op1 = force_reg (SImode, op1);
5395 v = 0x1020300010203ll;
5396 emit_move_insn (pat, immed_double_const (v, v, TImode));
5397 emit_insn (gen_shufb (splatted, op1, op1, pat));
5399 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5400 emit_insn (gen_subv4si3 (sp, sp, splatted));
5402 if (flag_stack_check)
5404 rtx avail = gen_reg_rtx(SImode);
5405 rtx result = gen_reg_rtx(SImode);
5406 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5407 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5408 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5411 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5413 emit_move_insn (stack_bot, chain);
5415 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5418 void
5419 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5421 static unsigned char arr[16] =
5422 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5423 rtx temp = gen_reg_rtx (SImode);
5424 rtx temp2 = gen_reg_rtx (SImode);
5425 rtx temp3 = gen_reg_rtx (V4SImode);
5426 rtx temp4 = gen_reg_rtx (V4SImode);
5427 rtx pat = gen_reg_rtx (TImode);
5428 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5430 /* Restore the backchain from the first word, sp from the second. */
5431 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5432 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5434 emit_move_insn (pat, array_to_constant (TImode, arr));
5436 /* Compute Available Stack Size for sp */
5437 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5438 emit_insn (gen_shufb (temp3, temp, temp, pat));
5440 /* Compute Available Stack Size for back chain */
5441 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5442 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5443 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5445 emit_insn (gen_addv4si3 (sp, sp, temp3));
5446 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5449 static void
5450 spu_init_libfuncs (void)
5452 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5453 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5454 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5455 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5456 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5457 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5458 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5459 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5460 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5461 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5462 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5463 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5465 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5466 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5468 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5469 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5470 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5471 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5472 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5473 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5474 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5475 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5476 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5477 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5478 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5479 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5481 set_optab_libfunc (smul_optab, TImode, "__multi3");
5482 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5483 set_optab_libfunc (smod_optab, TImode, "__modti3");
5484 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5485 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5486 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5489 /* Make a subreg, stripping any existing subreg. We could possibly just
5490 call simplify_subreg, but in this case we know what we want. */
5492 spu_gen_subreg (machine_mode mode, rtx x)
5494 if (GET_CODE (x) == SUBREG)
5495 x = SUBREG_REG (x);
5496 if (GET_MODE (x) == mode)
5497 return x;
5498 return gen_rtx_SUBREG (mode, x, 0);
5501 static bool
5502 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5504 return (TYPE_MODE (type) == BLKmode
5505 && ((type) == 0
5506 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5507 || int_size_in_bytes (type) >
5508 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5511 /* Create the built-in types and functions */
5513 enum spu_function_code
5515 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5516 #include "spu-builtins.def"
5517 #undef DEF_BUILTIN
5518 NUM_SPU_BUILTINS
5521 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5523 struct spu_builtin_description spu_builtins[] = {
5524 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5525 {fcode, icode, name, type, params},
5526 #include "spu-builtins.def"
5527 #undef DEF_BUILTIN
5530 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5532 /* Returns the spu builtin decl for CODE. */
5534 static tree
5535 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5537 if (code >= NUM_SPU_BUILTINS)
5538 return error_mark_node;
5540 return spu_builtin_decls[code];
5544 static void
5545 spu_init_builtins (void)
5547 struct spu_builtin_description *d;
5548 unsigned int i;
5550 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5551 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5552 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5553 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5554 V4SF_type_node = build_vector_type (float_type_node, 4);
5555 V2DF_type_node = build_vector_type (double_type_node, 2);
5557 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5558 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5559 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5560 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5562 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5564 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5566 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5567 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5568 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5569 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5570 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5571 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5572 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5573 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5574 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5575 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5577 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5578 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5579 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5580 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5581 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5582 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5583 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5584 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5586 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5587 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5589 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5591 spu_builtin_types[SPU_BTI_PTR] =
5592 build_pointer_type (build_qualified_type
5593 (void_type_node,
5594 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5596 /* For each builtin we build a new prototype. The tree code will make
5597 sure nodes are shared. */
5598 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5600 tree p;
5601 char name[64]; /* build_function will make a copy. */
5602 int parm;
5604 if (d->name == 0)
5605 continue;
5607 /* Find last parm. */
5608 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5611 p = void_list_node;
5612 while (parm > 1)
5613 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5615 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5617 sprintf (name, "__builtin_%s", d->name);
5618 spu_builtin_decls[i] =
5619 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5620 if (d->fcode == SPU_MASK_FOR_LOAD)
5621 TREE_READONLY (spu_builtin_decls[i]) = 1;
5623 /* These builtins don't throw. */
5624 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5628 void
5629 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5631 static unsigned char arr[16] =
5632 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5634 rtx temp = gen_reg_rtx (Pmode);
5635 rtx temp2 = gen_reg_rtx (V4SImode);
5636 rtx temp3 = gen_reg_rtx (V4SImode);
5637 rtx pat = gen_reg_rtx (TImode);
5638 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5640 emit_move_insn (pat, array_to_constant (TImode, arr));
5642 /* Restore the sp. */
5643 emit_move_insn (temp, op1);
5644 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5646 /* Compute available stack size for sp. */
5647 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5648 emit_insn (gen_shufb (temp3, temp, temp, pat));
5650 emit_insn (gen_addv4si3 (sp, sp, temp3));
5651 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5655 spu_safe_dma (HOST_WIDE_INT channel)
5657 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5660 void
5661 spu_builtin_splats (rtx ops[])
5663 machine_mode mode = GET_MODE (ops[0]);
5664 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5666 unsigned char arr[16];
5667 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5668 emit_move_insn (ops[0], array_to_constant (mode, arr));
5670 else
5672 rtx reg = gen_reg_rtx (TImode);
5673 rtx shuf;
5674 if (GET_CODE (ops[1]) != REG
5675 && GET_CODE (ops[1]) != SUBREG)
5676 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5677 switch (mode)
5679 case V2DImode:
5680 case V2DFmode:
5681 shuf =
5682 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5683 TImode);
5684 break;
5685 case V4SImode:
5686 case V4SFmode:
5687 shuf =
5688 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5689 TImode);
5690 break;
5691 case V8HImode:
5692 shuf =
5693 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5694 TImode);
5695 break;
5696 case V16QImode:
5697 shuf =
5698 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5699 TImode);
5700 break;
5701 default:
5702 abort ();
5704 emit_move_insn (reg, shuf);
5705 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5709 void
5710 spu_builtin_extract (rtx ops[])
5712 machine_mode mode;
5713 rtx rot, from, tmp;
5715 mode = GET_MODE (ops[1]);
5717 if (GET_CODE (ops[2]) == CONST_INT)
5719 switch (mode)
5721 case V16QImode:
5722 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5723 break;
5724 case V8HImode:
5725 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5726 break;
5727 case V4SFmode:
5728 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5729 break;
5730 case V4SImode:
5731 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5732 break;
5733 case V2DImode:
5734 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5735 break;
5736 case V2DFmode:
5737 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5738 break;
5739 default:
5740 abort ();
5742 return;
5745 from = spu_gen_subreg (TImode, ops[1]);
5746 rot = gen_reg_rtx (TImode);
5747 tmp = gen_reg_rtx (SImode);
5749 switch (mode)
5751 case V16QImode:
5752 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5753 break;
5754 case V8HImode:
5755 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5756 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5757 break;
5758 case V4SFmode:
5759 case V4SImode:
5760 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5761 break;
5762 case V2DImode:
5763 case V2DFmode:
5764 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5765 break;
5766 default:
5767 abort ();
5769 emit_insn (gen_rotqby_ti (rot, from, tmp));
5771 emit_insn (gen_spu_convert (ops[0], rot));
5774 void
5775 spu_builtin_insert (rtx ops[])
5777 machine_mode mode = GET_MODE (ops[0]);
5778 machine_mode imode = GET_MODE_INNER (mode);
5779 rtx mask = gen_reg_rtx (TImode);
5780 rtx offset;
5782 if (GET_CODE (ops[3]) == CONST_INT)
5783 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5784 else
5786 offset = gen_reg_rtx (SImode);
5787 emit_insn (gen_mulsi3
5788 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5790 emit_insn (gen_cpat
5791 (mask, stack_pointer_rtx, offset,
5792 GEN_INT (GET_MODE_SIZE (imode))));
5793 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5796 void
5797 spu_builtin_promote (rtx ops[])
5799 machine_mode mode, imode;
5800 rtx rot, from, offset;
5801 HOST_WIDE_INT pos;
5803 mode = GET_MODE (ops[0]);
5804 imode = GET_MODE_INNER (mode);
5806 from = gen_reg_rtx (TImode);
5807 rot = spu_gen_subreg (TImode, ops[0]);
5809 emit_insn (gen_spu_convert (from, ops[1]));
5811 if (GET_CODE (ops[2]) == CONST_INT)
5813 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5814 if (GET_MODE_SIZE (imode) < 4)
5815 pos += 4 - GET_MODE_SIZE (imode);
5816 offset = GEN_INT (pos & 15);
5818 else
5820 offset = gen_reg_rtx (SImode);
5821 switch (mode)
5823 case V16QImode:
5824 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5825 break;
5826 case V8HImode:
5827 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5828 emit_insn (gen_addsi3 (offset, offset, offset));
5829 break;
5830 case V4SFmode:
5831 case V4SImode:
5832 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5833 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5834 break;
5835 case V2DImode:
5836 case V2DFmode:
5837 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5838 break;
5839 default:
5840 abort ();
5843 emit_insn (gen_rotqby_ti (rot, from, offset));
5846 static void
5847 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5849 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5850 rtx shuf = gen_reg_rtx (V4SImode);
5851 rtx insn = gen_reg_rtx (V4SImode);
5852 rtx shufc;
5853 rtx insnc;
5854 rtx mem;
5856 fnaddr = force_reg (SImode, fnaddr);
5857 cxt = force_reg (SImode, cxt);
5859 if (TARGET_LARGE_MEM)
5861 rtx rotl = gen_reg_rtx (V4SImode);
5862 rtx mask = gen_reg_rtx (V4SImode);
5863 rtx bi = gen_reg_rtx (SImode);
5864 static unsigned char const shufa[16] = {
5865 2, 3, 0, 1, 18, 19, 16, 17,
5866 0, 1, 2, 3, 16, 17, 18, 19
5868 static unsigned char const insna[16] = {
5869 0x41, 0, 0, 79,
5870 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5871 0x60, 0x80, 0, 79,
5872 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5875 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5876 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5878 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5879 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5880 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5881 emit_insn (gen_selb (insn, insnc, rotl, mask));
5883 mem = adjust_address (m_tramp, V4SImode, 0);
5884 emit_move_insn (mem, insn);
5886 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5887 mem = adjust_address (m_tramp, Pmode, 16);
5888 emit_move_insn (mem, bi);
5890 else
5892 rtx scxt = gen_reg_rtx (SImode);
5893 rtx sfnaddr = gen_reg_rtx (SImode);
5894 static unsigned char const insna[16] = {
5895 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5896 0x30, 0, 0, 0,
5897 0, 0, 0, 0,
5898 0, 0, 0, 0
5901 shufc = gen_reg_rtx (TImode);
5902 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5904 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5905 fits 18 bits and the last 4 are zeros. This will be true if
5906 the stack pointer is initialized to 0x3fff0 at program start,
5907 otherwise the ila instruction will be garbage. */
5909 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5910 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5911 emit_insn (gen_cpat
5912 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5913 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5914 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5916 mem = adjust_address (m_tramp, V4SImode, 0);
5917 emit_move_insn (mem, insn);
5919 emit_insn (gen_sync ());
5922 static bool
5923 spu_warn_func_return (tree decl)
5925 /* Naked functions are implemented entirely in assembly, including the
5926 return sequence, so suppress warnings about this. */
5927 return !spu_naked_function_p (decl);
5930 void
5931 spu_expand_sign_extend (rtx ops[])
5933 unsigned char arr[16];
5934 rtx pat = gen_reg_rtx (TImode);
5935 rtx sign, c;
5936 int i, last;
5937 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5938 if (GET_MODE (ops[1]) == QImode)
5940 sign = gen_reg_rtx (HImode);
5941 emit_insn (gen_extendqihi2 (sign, ops[1]));
5942 for (i = 0; i < 16; i++)
5943 arr[i] = 0x12;
5944 arr[last] = 0x13;
5946 else
5948 for (i = 0; i < 16; i++)
5949 arr[i] = 0x10;
5950 switch (GET_MODE (ops[1]))
5952 case HImode:
5953 sign = gen_reg_rtx (SImode);
5954 emit_insn (gen_extendhisi2 (sign, ops[1]));
5955 arr[last] = 0x03;
5956 arr[last - 1] = 0x02;
5957 break;
5958 case SImode:
5959 sign = gen_reg_rtx (SImode);
5960 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5961 for (i = 0; i < 4; i++)
5962 arr[last - i] = 3 - i;
5963 break;
5964 case DImode:
5965 sign = gen_reg_rtx (SImode);
5966 c = gen_reg_rtx (SImode);
5967 emit_insn (gen_spu_convert (c, ops[1]));
5968 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5969 for (i = 0; i < 8; i++)
5970 arr[last - i] = 7 - i;
5971 break;
5972 default:
5973 abort ();
5976 emit_move_insn (pat, array_to_constant (TImode, arr));
5977 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5980 /* expand vector initialization. If there are any constant parts,
5981 load constant parts first. Then load any non-constant parts. */
5982 void
5983 spu_expand_vector_init (rtx target, rtx vals)
5985 machine_mode mode = GET_MODE (target);
5986 int n_elts = GET_MODE_NUNITS (mode);
5987 int n_var = 0;
5988 bool all_same = true;
5989 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5990 int i;
5992 first = XVECEXP (vals, 0, 0);
5993 for (i = 0; i < n_elts; ++i)
5995 x = XVECEXP (vals, 0, i);
5996 if (!(CONST_INT_P (x)
5997 || GET_CODE (x) == CONST_DOUBLE
5998 || GET_CODE (x) == CONST_FIXED))
5999 ++n_var;
6000 else
6002 if (first_constant == NULL_RTX)
6003 first_constant = x;
6005 if (i > 0 && !rtx_equal_p (x, first))
6006 all_same = false;
6009 /* if all elements are the same, use splats to repeat elements */
6010 if (all_same)
6012 if (!CONSTANT_P (first)
6013 && !register_operand (first, GET_MODE (x)))
6014 first = force_reg (GET_MODE (first), first);
6015 emit_insn (gen_spu_splats (target, first));
6016 return;
6019 /* load constant parts */
6020 if (n_var != n_elts)
6022 if (n_var == 0)
6024 emit_move_insn (target,
6025 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6027 else
6029 rtx constant_parts_rtx = copy_rtx (vals);
6031 gcc_assert (first_constant != NULL_RTX);
6032 /* fill empty slots with the first constant, this increases
6033 our chance of using splats in the recursive call below. */
6034 for (i = 0; i < n_elts; ++i)
6036 x = XVECEXP (constant_parts_rtx, 0, i);
6037 if (!(CONST_INT_P (x)
6038 || GET_CODE (x) == CONST_DOUBLE
6039 || GET_CODE (x) == CONST_FIXED))
6040 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6043 spu_expand_vector_init (target, constant_parts_rtx);
6047 /* load variable parts */
6048 if (n_var != 0)
6050 rtx insert_operands[4];
6052 insert_operands[0] = target;
6053 insert_operands[2] = target;
6054 for (i = 0; i < n_elts; ++i)
6056 x = XVECEXP (vals, 0, i);
6057 if (!(CONST_INT_P (x)
6058 || GET_CODE (x) == CONST_DOUBLE
6059 || GET_CODE (x) == CONST_FIXED))
6061 if (!register_operand (x, GET_MODE (x)))
6062 x = force_reg (GET_MODE (x), x);
6063 insert_operands[1] = x;
6064 insert_operands[3] = GEN_INT (i);
6065 spu_builtin_insert (insert_operands);
6071 /* Return insn index for the vector compare instruction for given CODE,
6072 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6074 static int
6075 get_vec_cmp_insn (enum rtx_code code,
6076 machine_mode dest_mode,
6077 machine_mode op_mode)
6080 switch (code)
6082 case EQ:
6083 if (dest_mode == V16QImode && op_mode == V16QImode)
6084 return CODE_FOR_ceq_v16qi;
6085 if (dest_mode == V8HImode && op_mode == V8HImode)
6086 return CODE_FOR_ceq_v8hi;
6087 if (dest_mode == V4SImode && op_mode == V4SImode)
6088 return CODE_FOR_ceq_v4si;
6089 if (dest_mode == V4SImode && op_mode == V4SFmode)
6090 return CODE_FOR_ceq_v4sf;
6091 if (dest_mode == V2DImode && op_mode == V2DFmode)
6092 return CODE_FOR_ceq_v2df;
6093 break;
6094 case GT:
6095 if (dest_mode == V16QImode && op_mode == V16QImode)
6096 return CODE_FOR_cgt_v16qi;
6097 if (dest_mode == V8HImode && op_mode == V8HImode)
6098 return CODE_FOR_cgt_v8hi;
6099 if (dest_mode == V4SImode && op_mode == V4SImode)
6100 return CODE_FOR_cgt_v4si;
6101 if (dest_mode == V4SImode && op_mode == V4SFmode)
6102 return CODE_FOR_cgt_v4sf;
6103 if (dest_mode == V2DImode && op_mode == V2DFmode)
6104 return CODE_FOR_cgt_v2df;
6105 break;
6106 case GTU:
6107 if (dest_mode == V16QImode && op_mode == V16QImode)
6108 return CODE_FOR_clgt_v16qi;
6109 if (dest_mode == V8HImode && op_mode == V8HImode)
6110 return CODE_FOR_clgt_v8hi;
6111 if (dest_mode == V4SImode && op_mode == V4SImode)
6112 return CODE_FOR_clgt_v4si;
6113 break;
6114 default:
6115 break;
6117 return -1;
6120 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6121 DMODE is expected destination mode. This is a recursive function. */
6123 static rtx
6124 spu_emit_vector_compare (enum rtx_code rcode,
6125 rtx op0, rtx op1,
6126 machine_mode dmode)
6128 int vec_cmp_insn;
6129 rtx mask;
6130 machine_mode dest_mode;
6131 machine_mode op_mode = GET_MODE (op1);
6133 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6135 /* Floating point vector compare instructions uses destination V4SImode.
6136 Double floating point vector compare instructions uses destination V2DImode.
6137 Move destination to appropriate mode later. */
6138 if (dmode == V4SFmode)
6139 dest_mode = V4SImode;
6140 else if (dmode == V2DFmode)
6141 dest_mode = V2DImode;
6142 else
6143 dest_mode = dmode;
6145 mask = gen_reg_rtx (dest_mode);
6146 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6148 if (vec_cmp_insn == -1)
6150 bool swap_operands = false;
6151 bool try_again = false;
6152 switch (rcode)
6154 case LT:
6155 rcode = GT;
6156 swap_operands = true;
6157 try_again = true;
6158 break;
6159 case LTU:
6160 rcode = GTU;
6161 swap_operands = true;
6162 try_again = true;
6163 break;
6164 case NE:
6165 case UNEQ:
6166 case UNLE:
6167 case UNLT:
6168 case UNGE:
6169 case UNGT:
6170 case UNORDERED:
6171 /* Treat A != B as ~(A==B). */
6173 enum rtx_code rev_code;
6174 enum insn_code nor_code;
6175 rtx rev_mask;
6177 rev_code = reverse_condition_maybe_unordered (rcode);
6178 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6180 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6181 gcc_assert (nor_code != CODE_FOR_nothing);
6182 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6183 if (dmode != dest_mode)
6185 rtx temp = gen_reg_rtx (dest_mode);
6186 convert_move (temp, mask, 0);
6187 return temp;
6189 return mask;
6191 break;
6192 case GE:
6193 case GEU:
6194 case LE:
6195 case LEU:
6196 /* Try GT/GTU/LT/LTU OR EQ */
6198 rtx c_rtx, eq_rtx;
6199 enum insn_code ior_code;
6200 enum rtx_code new_code;
6202 switch (rcode)
6204 case GE: new_code = GT; break;
6205 case GEU: new_code = GTU; break;
6206 case LE: new_code = LT; break;
6207 case LEU: new_code = LTU; break;
6208 default:
6209 gcc_unreachable ();
6212 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6213 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6215 ior_code = optab_handler (ior_optab, dest_mode);
6216 gcc_assert (ior_code != CODE_FOR_nothing);
6217 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6218 if (dmode != dest_mode)
6220 rtx temp = gen_reg_rtx (dest_mode);
6221 convert_move (temp, mask, 0);
6222 return temp;
6224 return mask;
6226 break;
6227 case LTGT:
6228 /* Try LT OR GT */
6230 rtx lt_rtx, gt_rtx;
6231 enum insn_code ior_code;
6233 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6234 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6236 ior_code = optab_handler (ior_optab, dest_mode);
6237 gcc_assert (ior_code != CODE_FOR_nothing);
6238 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6239 if (dmode != dest_mode)
6241 rtx temp = gen_reg_rtx (dest_mode);
6242 convert_move (temp, mask, 0);
6243 return temp;
6245 return mask;
6247 break;
6248 case ORDERED:
6249 /* Implement as (A==A) & (B==B) */
6251 rtx a_rtx, b_rtx;
6252 enum insn_code and_code;
6254 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6255 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6257 and_code = optab_handler (and_optab, dest_mode);
6258 gcc_assert (and_code != CODE_FOR_nothing);
6259 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6260 if (dmode != dest_mode)
6262 rtx temp = gen_reg_rtx (dest_mode);
6263 convert_move (temp, mask, 0);
6264 return temp;
6266 return mask;
6268 break;
6269 default:
6270 gcc_unreachable ();
6273 /* You only get two chances. */
6274 if (try_again)
6275 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6277 gcc_assert (vec_cmp_insn != -1);
6279 if (swap_operands)
6281 rtx tmp;
6282 tmp = op0;
6283 op0 = op1;
6284 op1 = tmp;
6288 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6289 if (dmode != dest_mode)
6291 rtx temp = gen_reg_rtx (dest_mode);
6292 convert_move (temp, mask, 0);
6293 return temp;
6295 return mask;
6299 /* Emit vector conditional expression.
6300 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6301 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6304 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6305 rtx cond, rtx cc_op0, rtx cc_op1)
6307 machine_mode dest_mode = GET_MODE (dest);
6308 enum rtx_code rcode = GET_CODE (cond);
6309 rtx mask;
6311 /* Get the vector mask for the given relational operations. */
6312 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6314 emit_insn(gen_selb (dest, op2, op1, mask));
6316 return 1;
6319 static rtx
6320 spu_force_reg (machine_mode mode, rtx op)
6322 rtx x, r;
6323 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6325 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6326 || GET_MODE (op) == BLKmode)
6327 return force_reg (mode, convert_to_mode (mode, op, 0));
6328 abort ();
6331 r = force_reg (GET_MODE (op), op);
6332 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6334 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6335 if (x)
6336 return x;
6339 x = gen_reg_rtx (mode);
6340 emit_insn (gen_spu_convert (x, r));
6341 return x;
6344 static void
6345 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6347 HOST_WIDE_INT v = 0;
6348 int lsbits;
6349 /* Check the range of immediate operands. */
6350 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6352 int range = p - SPU_BTI_7;
6354 if (!CONSTANT_P (op))
6355 error ("%s expects an integer literal in the range [%d, %d]",
6356 d->name,
6357 spu_builtin_range[range].low, spu_builtin_range[range].high);
6359 if (GET_CODE (op) == CONST
6360 && (GET_CODE (XEXP (op, 0)) == PLUS
6361 || GET_CODE (XEXP (op, 0)) == MINUS))
6363 v = INTVAL (XEXP (XEXP (op, 0), 1));
6364 op = XEXP (XEXP (op, 0), 0);
6366 else if (GET_CODE (op) == CONST_INT)
6367 v = INTVAL (op);
6368 else if (GET_CODE (op) == CONST_VECTOR
6369 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6370 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6372 /* The default for v is 0 which is valid in every range. */
6373 if (v < spu_builtin_range[range].low
6374 || v > spu_builtin_range[range].high)
6375 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6376 d->name,
6377 spu_builtin_range[range].low, spu_builtin_range[range].high,
6380 switch (p)
6382 case SPU_BTI_S10_4:
6383 lsbits = 4;
6384 break;
6385 case SPU_BTI_U16_2:
6386 /* This is only used in lqa, and stqa. Even though the insns
6387 encode 16 bits of the address (all but the 2 least
6388 significant), only 14 bits are used because it is masked to
6389 be 16 byte aligned. */
6390 lsbits = 4;
6391 break;
6392 case SPU_BTI_S16_2:
6393 /* This is used for lqr and stqr. */
6394 lsbits = 2;
6395 break;
6396 default:
6397 lsbits = 0;
6400 if (GET_CODE (op) == LABEL_REF
6401 || (GET_CODE (op) == SYMBOL_REF
6402 && SYMBOL_REF_FUNCTION_P (op))
6403 || (v & ((1 << lsbits) - 1)) != 0)
6404 warning (0, "%d least significant bits of %s are ignored", lsbits,
6405 d->name);
6410 static int
6411 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6412 rtx target, rtx ops[])
6414 enum insn_code icode = (enum insn_code) d->icode;
6415 int i = 0, a;
6417 /* Expand the arguments into rtl. */
6419 if (d->parm[0] != SPU_BTI_VOID)
6420 ops[i++] = target;
6422 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6424 tree arg = CALL_EXPR_ARG (exp, a);
6425 if (arg == 0)
6426 abort ();
6427 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6430 gcc_assert (i == insn_data[icode].n_generator_args);
6431 return i;
6434 static rtx
6435 spu_expand_builtin_1 (struct spu_builtin_description *d,
6436 tree exp, rtx target)
6438 rtx pat;
6439 rtx ops[8];
6440 enum insn_code icode = (enum insn_code) d->icode;
6441 machine_mode mode, tmode;
6442 int i, p;
6443 int n_operands;
6444 tree return_type;
6446 /* Set up ops[] with values from arglist. */
6447 n_operands = expand_builtin_args (d, exp, target, ops);
6449 /* Handle the target operand which must be operand 0. */
6450 i = 0;
6451 if (d->parm[0] != SPU_BTI_VOID)
6454 /* We prefer the mode specified for the match_operand otherwise
6455 use the mode from the builtin function prototype. */
6456 tmode = insn_data[d->icode].operand[0].mode;
6457 if (tmode == VOIDmode)
6458 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6460 /* Try to use target because not using it can lead to extra copies
6461 and when we are using all of the registers extra copies leads
6462 to extra spills. */
6463 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6464 ops[0] = target;
6465 else
6466 target = ops[0] = gen_reg_rtx (tmode);
6468 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6469 abort ();
6471 i++;
6474 if (d->fcode == SPU_MASK_FOR_LOAD)
6476 machine_mode mode = insn_data[icode].operand[1].mode;
6477 tree arg;
6478 rtx addr, op, pat;
6480 /* get addr */
6481 arg = CALL_EXPR_ARG (exp, 0);
6482 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6483 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6484 addr = memory_address (mode, op);
6486 /* negate addr */
6487 op = gen_reg_rtx (GET_MODE (addr));
6488 emit_insn (gen_rtx_SET (VOIDmode, op,
6489 gen_rtx_NEG (GET_MODE (addr), addr)));
6490 op = gen_rtx_MEM (mode, op);
6492 pat = GEN_FCN (icode) (target, op);
6493 if (!pat)
6494 return 0;
6495 emit_insn (pat);
6496 return target;
6499 /* Ignore align_hint, but still expand it's args in case they have
6500 side effects. */
6501 if (icode == CODE_FOR_spu_align_hint)
6502 return 0;
6504 /* Handle the rest of the operands. */
6505 for (p = 1; i < n_operands; i++, p++)
6507 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6508 mode = insn_data[d->icode].operand[i].mode;
6509 else
6510 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6512 /* mode can be VOIDmode here for labels */
6514 /* For specific intrinsics with an immediate operand, e.g.,
6515 si_ai(), we sometimes need to convert the scalar argument to a
6516 vector argument by splatting the scalar. */
6517 if (VECTOR_MODE_P (mode)
6518 && (GET_CODE (ops[i]) == CONST_INT
6519 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6520 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6522 if (GET_CODE (ops[i]) == CONST_INT)
6523 ops[i] = spu_const (mode, INTVAL (ops[i]));
6524 else
6526 rtx reg = gen_reg_rtx (mode);
6527 machine_mode imode = GET_MODE_INNER (mode);
6528 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6529 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6530 if (imode != GET_MODE (ops[i]))
6531 ops[i] = convert_to_mode (imode, ops[i],
6532 TYPE_UNSIGNED (spu_builtin_types
6533 [d->parm[i]]));
6534 emit_insn (gen_spu_splats (reg, ops[i]));
6535 ops[i] = reg;
6539 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6541 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6542 ops[i] = spu_force_reg (mode, ops[i]);
6545 switch (n_operands)
6547 case 0:
6548 pat = GEN_FCN (icode) (0);
6549 break;
6550 case 1:
6551 pat = GEN_FCN (icode) (ops[0]);
6552 break;
6553 case 2:
6554 pat = GEN_FCN (icode) (ops[0], ops[1]);
6555 break;
6556 case 3:
6557 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6558 break;
6559 case 4:
6560 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6561 break;
6562 case 5:
6563 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6564 break;
6565 case 6:
6566 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6567 break;
6568 default:
6569 abort ();
6572 if (!pat)
6573 abort ();
6575 if (d->type == B_CALL || d->type == B_BISLED)
6576 emit_call_insn (pat);
6577 else if (d->type == B_JUMP)
6579 emit_jump_insn (pat);
6580 emit_barrier ();
6582 else
6583 emit_insn (pat);
6585 return_type = spu_builtin_types[d->parm[0]];
6586 if (d->parm[0] != SPU_BTI_VOID
6587 && GET_MODE (target) != TYPE_MODE (return_type))
6589 /* target is the return value. It should always be the mode of
6590 the builtin function prototype. */
6591 target = spu_force_reg (TYPE_MODE (return_type), target);
6594 return target;
6598 spu_expand_builtin (tree exp,
6599 rtx target,
6600 rtx subtarget ATTRIBUTE_UNUSED,
6601 machine_mode mode ATTRIBUTE_UNUSED,
6602 int ignore ATTRIBUTE_UNUSED)
6604 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6605 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6606 struct spu_builtin_description *d;
6608 if (fcode < NUM_SPU_BUILTINS)
6610 d = &spu_builtins[fcode];
6612 return spu_expand_builtin_1 (d, exp, target);
6614 abort ();
6617 /* Implement targetm.vectorize.builtin_mask_for_load. */
6618 static tree
6619 spu_builtin_mask_for_load (void)
6621 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6624 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6625 static int
6626 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6627 tree vectype,
6628 int misalign ATTRIBUTE_UNUSED)
6630 unsigned elements;
6632 switch (type_of_cost)
6634 case scalar_stmt:
6635 case vector_stmt:
6636 case vector_load:
6637 case vector_store:
6638 case vec_to_scalar:
6639 case scalar_to_vec:
6640 case cond_branch_not_taken:
6641 case vec_perm:
6642 case vec_promote_demote:
6643 return 1;
6645 case scalar_store:
6646 return 10;
6648 case scalar_load:
6649 /* Load + rotate. */
6650 return 2;
6652 case unaligned_load:
6653 return 2;
6655 case cond_branch_taken:
6656 return 6;
6658 case vec_construct:
6659 elements = TYPE_VECTOR_SUBPARTS (vectype);
6660 return elements / 2 + 1;
6662 default:
6663 gcc_unreachable ();
6667 /* Implement targetm.vectorize.init_cost. */
6669 static void *
6670 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6672 unsigned *cost = XNEWVEC (unsigned, 3);
6673 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6674 return cost;
6677 /* Implement targetm.vectorize.add_stmt_cost. */
6679 static unsigned
6680 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6681 struct _stmt_vec_info *stmt_info, int misalign,
6682 enum vect_cost_model_location where)
6684 unsigned *cost = (unsigned *) data;
6685 unsigned retval = 0;
6687 if (flag_vect_cost_model)
6689 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6690 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6692 /* Statements in an inner loop relative to the loop being
6693 vectorized are weighted more heavily. The value here is
6694 arbitrary and could potentially be improved with analysis. */
6695 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6696 count *= 50; /* FIXME. */
6698 retval = (unsigned) (count * stmt_cost);
6699 cost[where] += retval;
6702 return retval;
6705 /* Implement targetm.vectorize.finish_cost. */
6707 static void
6708 spu_finish_cost (void *data, unsigned *prologue_cost,
6709 unsigned *body_cost, unsigned *epilogue_cost)
6711 unsigned *cost = (unsigned *) data;
6712 *prologue_cost = cost[vect_prologue];
6713 *body_cost = cost[vect_body];
6714 *epilogue_cost = cost[vect_epilogue];
6717 /* Implement targetm.vectorize.destroy_cost_data. */
6719 static void
6720 spu_destroy_cost_data (void *data)
6722 free (data);
6725 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6726 after applying N number of iterations. This routine does not determine
6727 how may iterations are required to reach desired alignment. */
6729 static bool
6730 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6732 if (is_packed)
6733 return false;
6735 /* All other types are naturally aligned. */
6736 return true;
6739 /* Return the appropriate mode for a named address pointer. */
6740 static machine_mode
6741 spu_addr_space_pointer_mode (addr_space_t addrspace)
6743 switch (addrspace)
6745 case ADDR_SPACE_GENERIC:
6746 return ptr_mode;
6747 case ADDR_SPACE_EA:
6748 return EAmode;
6749 default:
6750 gcc_unreachable ();
6754 /* Return the appropriate mode for a named address address. */
6755 static machine_mode
6756 spu_addr_space_address_mode (addr_space_t addrspace)
6758 switch (addrspace)
6760 case ADDR_SPACE_GENERIC:
6761 return Pmode;
6762 case ADDR_SPACE_EA:
6763 return EAmode;
6764 default:
6765 gcc_unreachable ();
6769 /* Determine if one named address space is a subset of another. */
6771 static bool
6772 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6774 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6775 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6777 if (subset == superset)
6778 return true;
6780 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6781 being subsets but instead as disjoint address spaces. */
6782 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6783 return false;
6785 else
6786 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6789 /* Convert from one address space to another. */
6790 static rtx
6791 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6793 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6794 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6796 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6797 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6799 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6801 rtx result, ls;
6803 ls = gen_const_mem (DImode,
6804 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6805 set_mem_align (ls, 128);
6807 result = gen_reg_rtx (Pmode);
6808 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6809 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6810 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6811 ls, const0_rtx, Pmode, 1);
6813 emit_insn (gen_subsi3 (result, op, ls));
6815 return result;
6818 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6820 rtx result, ls;
6822 ls = gen_const_mem (DImode,
6823 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6824 set_mem_align (ls, 128);
6826 result = gen_reg_rtx (EAmode);
6827 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6828 op = force_reg (Pmode, op);
6829 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6830 ls, const0_rtx, EAmode, 1);
6831 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6833 if (EAmode == SImode)
6834 emit_insn (gen_addsi3 (result, op, ls));
6835 else
6836 emit_insn (gen_adddi3 (result, op, ls));
6838 return result;
6841 else
6842 gcc_unreachable ();
6846 /* Count the total number of instructions in each pipe and return the
6847 maximum, which is used as the Minimum Iteration Interval (MII)
6848 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6849 -2 are instructions that can go in pipe0 or pipe1. */
6850 static int
6851 spu_sms_res_mii (struct ddg *g)
6853 int i;
6854 unsigned t[4] = {0, 0, 0, 0};
6856 for (i = 0; i < g->num_nodes; i++)
6858 rtx_insn *insn = g->nodes[i].insn;
6859 int p = get_pipe (insn) + 2;
6861 gcc_assert (p >= 0);
6862 gcc_assert (p < 4);
6864 t[p]++;
6865 if (dump_file && INSN_P (insn))
6866 fprintf (dump_file, "i%d %s %d %d\n",
6867 INSN_UID (insn),
6868 insn_data[INSN_CODE(insn)].name,
6869 p, t[p]);
6871 if (dump_file)
6872 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6874 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6878 void
6879 spu_init_expanders (void)
6881 if (cfun)
6883 rtx r0, r1;
6884 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6885 frame_pointer_needed is true. We don't know that until we're
6886 expanding the prologue. */
6887 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6889 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6890 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6891 to be treated as aligned, so generate them here. */
6892 r0 = gen_reg_rtx (SImode);
6893 r1 = gen_reg_rtx (SImode);
6894 mark_reg_pointer (r0, 128);
6895 mark_reg_pointer (r1, 128);
6896 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6897 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6901 static machine_mode
6902 spu_libgcc_cmp_return_mode (void)
6905 /* For SPU word mode is TI mode so it is better to use SImode
6906 for compare returns. */
6907 return SImode;
6910 static machine_mode
6911 spu_libgcc_shift_count_mode (void)
6913 /* For SPU word mode is TI mode so it is better to use SImode
6914 for shift counts. */
6915 return SImode;
6918 /* Implement targetm.section_type_flags. */
6919 static unsigned int
6920 spu_section_type_flags (tree decl, const char *name, int reloc)
6922 /* .toe needs to have type @nobits. */
6923 if (strcmp (name, ".toe") == 0)
6924 return SECTION_BSS;
6925 /* Don't load _ea into the current address space. */
6926 if (strcmp (name, "._ea") == 0)
6927 return SECTION_WRITE | SECTION_DEBUG;
6928 return default_section_type_flags (decl, name, reloc);
6931 /* Implement targetm.select_section. */
6932 static section *
6933 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6935 /* Variables and constants defined in the __ea address space
6936 go into a special section named "._ea". */
6937 if (TREE_TYPE (decl) != error_mark_node
6938 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6940 /* We might get called with string constants, but get_named_section
6941 doesn't like them as they are not DECLs. Also, we need to set
6942 flags in that case. */
6943 if (!DECL_P (decl))
6944 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6946 return get_named_section (decl, "._ea", reloc);
6949 return default_elf_select_section (decl, reloc, align);
6952 /* Implement targetm.unique_section. */
6953 static void
6954 spu_unique_section (tree decl, int reloc)
6956 /* We don't support unique section names in the __ea address
6957 space for now. */
6958 if (TREE_TYPE (decl) != error_mark_node
6959 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6960 return;
6962 default_unique_section (decl, reloc);
6965 /* Generate a constant or register which contains 2^SCALE. We assume
6966 the result is valid for MODE. Currently, MODE must be V4SFmode and
6967 SCALE must be SImode. */
6969 spu_gen_exp2 (machine_mode mode, rtx scale)
6971 gcc_assert (mode == V4SFmode);
6972 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6973 if (GET_CODE (scale) != CONST_INT)
6975 /* unsigned int exp = (127 + scale) << 23;
6976 __vector float m = (__vector float) spu_splats (exp); */
6977 rtx reg = force_reg (SImode, scale);
6978 rtx exp = gen_reg_rtx (SImode);
6979 rtx mul = gen_reg_rtx (mode);
6980 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6981 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6982 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6983 return mul;
6985 else
6987 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6988 unsigned char arr[16];
6989 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6990 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6991 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6992 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6993 return array_to_constant (mode, arr);
6997 /* After reload, just change the convert into a move instruction
6998 or a dead instruction. */
6999 void
7000 spu_split_convert (rtx ops[])
7002 if (REGNO (ops[0]) == REGNO (ops[1]))
7003 emit_note (NOTE_INSN_DELETED);
7004 else
7006 /* Use TImode always as this might help hard reg copyprop. */
7007 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7008 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7009 emit_insn (gen_move_insn (op0, op1));
7013 void
7014 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7016 fprintf (file, "# profile\n");
7017 fprintf (file, "brsl $75, _mcount\n");
7020 /* Implement targetm.ref_may_alias_errno. */
7021 static bool
7022 spu_ref_may_alias_errno (ao_ref *ref)
7024 tree base = ao_ref_base (ref);
7026 /* With SPU newlib, errno is defined as something like
7027 _impure_data._errno
7028 The default implementation of this target macro does not
7029 recognize such expressions, so special-code for it here. */
7031 if (TREE_CODE (base) == VAR_DECL
7032 && !TREE_STATIC (base)
7033 && DECL_EXTERNAL (base)
7034 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7035 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7036 "_impure_data") == 0
7037 /* _errno is the first member of _impure_data. */
7038 && ref->offset == 0)
7039 return true;
7041 return default_ref_may_alias_errno (ref);
7044 /* Output thunk to FILE that implements a C++ virtual function call (with
7045 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7046 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7047 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7048 relative to the resulting this pointer. */
7050 static void
7051 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7052 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7053 tree function)
7055 rtx op[8];
7057 /* Make sure unwind info is emitted for the thunk if needed. */
7058 final_start_function (emit_barrier (), file, 1);
7060 /* Operand 0 is the target function. */
7061 op[0] = XEXP (DECL_RTL (function), 0);
7063 /* Operand 1 is the 'this' pointer. */
7064 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7065 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7066 else
7067 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7069 /* Operands 2/3 are the low/high halfwords of delta. */
7070 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7071 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7073 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7074 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7075 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7077 /* Operands 6/7 are temporary registers. */
7078 op[6] = gen_rtx_REG (Pmode, 79);
7079 op[7] = gen_rtx_REG (Pmode, 78);
7081 /* Add DELTA to this pointer. */
7082 if (delta)
7084 if (delta >= -0x200 && delta < 0x200)
7085 output_asm_insn ("ai\t%1,%1,%2", op);
7086 else if (delta >= -0x8000 && delta < 0x8000)
7088 output_asm_insn ("il\t%6,%2", op);
7089 output_asm_insn ("a\t%1,%1,%6", op);
7091 else
7093 output_asm_insn ("ilhu\t%6,%3", op);
7094 output_asm_insn ("iohl\t%6,%2", op);
7095 output_asm_insn ("a\t%1,%1,%6", op);
7099 /* Perform vcall adjustment. */
7100 if (vcall_offset)
7102 output_asm_insn ("lqd\t%7,0(%1)", op);
7103 output_asm_insn ("rotqby\t%7,%7,%1", op);
7105 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7106 output_asm_insn ("ai\t%7,%7,%4", op);
7107 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7109 output_asm_insn ("il\t%6,%4", op);
7110 output_asm_insn ("a\t%7,%7,%6", op);
7112 else
7114 output_asm_insn ("ilhu\t%6,%5", op);
7115 output_asm_insn ("iohl\t%6,%4", op);
7116 output_asm_insn ("a\t%7,%7,%6", op);
7119 output_asm_insn ("lqd\t%6,0(%7)", op);
7120 output_asm_insn ("rotqby\t%6,%6,%7", op);
7121 output_asm_insn ("a\t%1,%1,%6", op);
7124 /* Jump to target. */
7125 output_asm_insn ("br\t%0", op);
7127 final_end_function ();
7130 /* Canonicalize a comparison from one we don't have to one we do have. */
7131 static void
7132 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7133 bool op0_preserve_value)
7135 if (!op0_preserve_value
7136 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7138 rtx tem = *op0;
7139 *op0 = *op1;
7140 *op1 = tem;
7141 *code = (int)swap_condition ((enum rtx_code)*code);
7145 /* Table of machine attributes. */
7146 static const struct attribute_spec spu_attribute_table[] =
7148 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7149 affects_type_identity } */
7150 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7151 false },
7152 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7153 false },
7154 { NULL, 0, 0, false, false, false, NULL, false }
7157 /* TARGET overrides. */
7159 #undef TARGET_ADDR_SPACE_POINTER_MODE
7160 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7162 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7163 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7165 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7166 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7167 spu_addr_space_legitimate_address_p
7169 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7170 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7172 #undef TARGET_ADDR_SPACE_SUBSET_P
7173 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7175 #undef TARGET_ADDR_SPACE_CONVERT
7176 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7178 #undef TARGET_INIT_BUILTINS
7179 #define TARGET_INIT_BUILTINS spu_init_builtins
7180 #undef TARGET_BUILTIN_DECL
7181 #define TARGET_BUILTIN_DECL spu_builtin_decl
7183 #undef TARGET_EXPAND_BUILTIN
7184 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7186 #undef TARGET_UNWIND_WORD_MODE
7187 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7189 #undef TARGET_LEGITIMIZE_ADDRESS
7190 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7192 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7193 and .quad for the debugger. When it is known that the assembler is fixed,
7194 these can be removed. */
7195 #undef TARGET_ASM_UNALIGNED_SI_OP
7196 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7198 #undef TARGET_ASM_ALIGNED_DI_OP
7199 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7201 /* The .8byte directive doesn't seem to work well for a 32 bit
7202 architecture. */
7203 #undef TARGET_ASM_UNALIGNED_DI_OP
7204 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7206 #undef TARGET_RTX_COSTS
7207 #define TARGET_RTX_COSTS spu_rtx_costs
7209 #undef TARGET_ADDRESS_COST
7210 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7212 #undef TARGET_SCHED_ISSUE_RATE
7213 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7215 #undef TARGET_SCHED_INIT_GLOBAL
7216 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7218 #undef TARGET_SCHED_INIT
7219 #define TARGET_SCHED_INIT spu_sched_init
7221 #undef TARGET_SCHED_VARIABLE_ISSUE
7222 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7224 #undef TARGET_SCHED_REORDER
7225 #define TARGET_SCHED_REORDER spu_sched_reorder
7227 #undef TARGET_SCHED_REORDER2
7228 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7230 #undef TARGET_SCHED_ADJUST_COST
7231 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7233 #undef TARGET_ATTRIBUTE_TABLE
7234 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7236 #undef TARGET_ASM_INTEGER
7237 #define TARGET_ASM_INTEGER spu_assemble_integer
7239 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7240 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7242 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7243 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7245 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7246 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7248 #undef TARGET_ASM_GLOBALIZE_LABEL
7249 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7251 #undef TARGET_PASS_BY_REFERENCE
7252 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7254 #undef TARGET_FUNCTION_ARG
7255 #define TARGET_FUNCTION_ARG spu_function_arg
7257 #undef TARGET_FUNCTION_ARG_ADVANCE
7258 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7260 #undef TARGET_MUST_PASS_IN_STACK
7261 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7263 #undef TARGET_BUILD_BUILTIN_VA_LIST
7264 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7266 #undef TARGET_EXPAND_BUILTIN_VA_START
7267 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7269 #undef TARGET_SETUP_INCOMING_VARARGS
7270 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7272 #undef TARGET_MACHINE_DEPENDENT_REORG
7273 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7275 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7276 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7278 #undef TARGET_INIT_LIBFUNCS
7279 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7281 #undef TARGET_RETURN_IN_MEMORY
7282 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7284 #undef TARGET_ENCODE_SECTION_INFO
7285 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7287 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7288 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7290 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7291 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7293 #undef TARGET_VECTORIZE_INIT_COST
7294 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7296 #undef TARGET_VECTORIZE_ADD_STMT_COST
7297 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7299 #undef TARGET_VECTORIZE_FINISH_COST
7300 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7302 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7303 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7305 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7306 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7308 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7309 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7311 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7312 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7314 #undef TARGET_SCHED_SMS_RES_MII
7315 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7317 #undef TARGET_SECTION_TYPE_FLAGS
7318 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7320 #undef TARGET_ASM_SELECT_SECTION
7321 #define TARGET_ASM_SELECT_SECTION spu_select_section
7323 #undef TARGET_ASM_UNIQUE_SECTION
7324 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7326 #undef TARGET_LEGITIMATE_ADDRESS_P
7327 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7329 #undef TARGET_LEGITIMATE_CONSTANT_P
7330 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7332 #undef TARGET_TRAMPOLINE_INIT
7333 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7335 #undef TARGET_WARN_FUNC_RETURN
7336 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7338 #undef TARGET_OPTION_OVERRIDE
7339 #define TARGET_OPTION_OVERRIDE spu_option_override
7341 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7342 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7344 #undef TARGET_REF_MAY_ALIAS_ERRNO
7345 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7347 #undef TARGET_ASM_OUTPUT_MI_THUNK
7348 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7349 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7350 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7352 /* Variable tracking should be run after all optimizations which
7353 change order of insns. It also needs a valid CFG. */
7354 #undef TARGET_DELAY_VARTRACK
7355 #define TARGET_DELAY_VARTRACK true
7357 #undef TARGET_CANONICALIZE_COMPARISON
7358 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7360 #undef TARGET_CAN_USE_DOLOOP_P
7361 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7363 struct gcc_target targetm = TARGET_INITIALIZER;
7365 #include "gt-spu.h"