gcc/
[official-gcc.git] / gcc / config / spu / spu.c
blob68840f51dc3daa3072eae55c13cce14da6fa6416
1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "cfghooks.h"
22 #include "tree.h"
23 #include "gimple.h"
24 #include "rtl.h"
25 #include "df.h"
26 #include "regs.h"
27 #include "insn-config.h"
28 #include "conditions.h"
29 #include "insn-attr.h"
30 #include "flags.h"
31 #include "recog.h"
32 #include "alias.h"
33 #include "fold-const.h"
34 #include "stringpool.h"
35 #include "stor-layout.h"
36 #include "calls.h"
37 #include "varasm.h"
38 #include "expmed.h"
39 #include "dojump.h"
40 #include "explow.h"
41 #include "emit-rtl.h"
42 #include "stmt.h"
43 #include "expr.h"
44 #include "insn-codes.h"
45 #include "optabs.h"
46 #include "except.h"
47 #include "output.h"
48 #include "cfgrtl.h"
49 #include "cfganal.h"
50 #include "lcm.h"
51 #include "cfgbuild.h"
52 #include "cfgcleanup.h"
53 #include "diagnostic-core.h"
54 #include "tm_p.h"
55 #include "target.h"
56 #include "langhooks.h"
57 #include "reload.h"
58 #include "sched-int.h"
59 #include "params.h"
60 #include "internal-fn.h"
61 #include "gimple-fold.h"
62 #include "tree-eh.h"
63 #include "gimplify.h"
64 #include "tm-constrs.h"
65 #include "ddg.h"
66 #include "timevar.h"
67 #include "dumpfile.h"
68 #include "cfgloop.h"
69 #include "builtins.h"
70 #include "rtl-iter.h"
72 /* This file should be included last. */
73 #include "target-def.h"
75 /* Builtin types, data and prototypes. */
77 enum spu_builtin_type_index
79 SPU_BTI_END_OF_PARAMS,
81 /* We create new type nodes for these. */
82 SPU_BTI_V16QI,
83 SPU_BTI_V8HI,
84 SPU_BTI_V4SI,
85 SPU_BTI_V2DI,
86 SPU_BTI_V4SF,
87 SPU_BTI_V2DF,
88 SPU_BTI_UV16QI,
89 SPU_BTI_UV8HI,
90 SPU_BTI_UV4SI,
91 SPU_BTI_UV2DI,
93 /* A 16-byte type. (Implemented with V16QI_type_node) */
94 SPU_BTI_QUADWORD,
96 /* These all correspond to intSI_type_node */
97 SPU_BTI_7,
98 SPU_BTI_S7,
99 SPU_BTI_U7,
100 SPU_BTI_S10,
101 SPU_BTI_S10_4,
102 SPU_BTI_U14,
103 SPU_BTI_16,
104 SPU_BTI_S16,
105 SPU_BTI_S16_2,
106 SPU_BTI_U16,
107 SPU_BTI_U16_2,
108 SPU_BTI_U18,
110 /* These correspond to the standard types */
111 SPU_BTI_INTQI,
112 SPU_BTI_INTHI,
113 SPU_BTI_INTSI,
114 SPU_BTI_INTDI,
116 SPU_BTI_UINTQI,
117 SPU_BTI_UINTHI,
118 SPU_BTI_UINTSI,
119 SPU_BTI_UINTDI,
121 SPU_BTI_FLOAT,
122 SPU_BTI_DOUBLE,
124 SPU_BTI_VOID,
125 SPU_BTI_PTR,
127 SPU_BTI_MAX
130 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
131 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
132 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
133 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
134 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
135 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
136 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
137 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
138 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
139 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
141 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
143 struct spu_builtin_range
145 int low, high;
148 static struct spu_builtin_range spu_builtin_range[] = {
149 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
150 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
151 {0ll, 0x7fll}, /* SPU_BTI_U7 */
152 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
153 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
154 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
155 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
156 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
157 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
158 {0ll, 0xffffll}, /* SPU_BTI_U16 */
159 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
160 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
164 /* Target specific attribute specifications. */
165 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
167 /* Prototypes and external defs. */
168 static int get_pipe (rtx_insn *insn);
169 static int spu_naked_function_p (tree func);
170 static int mem_is_padded_component_ref (rtx x);
171 static void fix_range (const char *);
172 static rtx spu_expand_load (rtx, rtx, rtx, int);
174 /* Which instruction set architecture to use. */
175 int spu_arch;
176 /* Which cpu are we tuning for. */
177 int spu_tune;
179 /* The hardware requires 8 insns between a hint and the branch it
180 effects. This variable describes how many rtl instructions the
181 compiler needs to see before inserting a hint, and then the compiler
182 will insert enough nops to make it at least 8 insns. The default is
183 for the compiler to allow up to 2 nops be emitted. The nops are
184 inserted in pairs, so we round down. */
185 int spu_hint_dist = (8*4) - (2*4);
187 enum spu_immediate {
188 SPU_NONE,
189 SPU_IL,
190 SPU_ILA,
191 SPU_ILH,
192 SPU_ILHU,
193 SPU_ORI,
194 SPU_ORHI,
195 SPU_ORBI,
196 SPU_IOHL
198 enum immediate_class
200 IC_POOL, /* constant pool */
201 IC_IL1, /* one il* instruction */
202 IC_IL2, /* both ilhu and iohl instructions */
203 IC_IL1s, /* one il* instruction */
204 IC_IL2s, /* both ilhu and iohl instructions */
205 IC_FSMBI, /* the fsmbi instruction */
206 IC_CPAT, /* one of the c*d instructions */
207 IC_FSMBI2 /* fsmbi plus 1 other instruction */
210 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
211 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
212 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
213 static enum immediate_class classify_immediate (rtx op,
214 machine_mode mode);
216 /* Pointer mode for __ea references. */
217 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
220 /* Define the structure for the machine field in struct function. */
221 struct GTY(()) machine_function
223 /* Register to use for PIC accesses. */
224 rtx pic_reg;
227 /* How to allocate a 'struct machine_function'. */
228 static struct machine_function *
229 spu_init_machine_status (void)
231 return ggc_cleared_alloc<machine_function> ();
234 /* Implement TARGET_OPTION_OVERRIDE. */
235 static void
236 spu_option_override (void)
238 /* Set up function hooks. */
239 init_machine_status = spu_init_machine_status;
241 /* Small loops will be unpeeled at -O3. For SPU it is more important
242 to keep code small by default. */
243 if (!flag_unroll_loops && !flag_peel_loops)
244 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
245 global_options.x_param_values,
246 global_options_set.x_param_values);
248 flag_omit_frame_pointer = 1;
250 /* Functions must be 8 byte aligned so we correctly handle dual issue */
251 if (align_functions < 8)
252 align_functions = 8;
254 spu_hint_dist = 8*4 - spu_max_nops*4;
255 if (spu_hint_dist < 0)
256 spu_hint_dist = 0;
258 if (spu_fixed_range_string)
259 fix_range (spu_fixed_range_string);
261 /* Determine processor architectural level. */
262 if (spu_arch_string)
264 if (strcmp (&spu_arch_string[0], "cell") == 0)
265 spu_arch = PROCESSOR_CELL;
266 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
267 spu_arch = PROCESSOR_CELLEDP;
268 else
269 error ("bad value (%s) for -march= switch", spu_arch_string);
272 /* Determine processor to tune for. */
273 if (spu_tune_string)
275 if (strcmp (&spu_tune_string[0], "cell") == 0)
276 spu_tune = PROCESSOR_CELL;
277 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
278 spu_tune = PROCESSOR_CELLEDP;
279 else
280 error ("bad value (%s) for -mtune= switch", spu_tune_string);
283 /* Change defaults according to the processor architecture. */
284 if (spu_arch == PROCESSOR_CELLEDP)
286 /* If no command line option has been otherwise specified, change
287 the default to -mno-safe-hints on celledp -- only the original
288 Cell/B.E. processors require this workaround. */
289 if (!(target_flags_explicit & MASK_SAFE_HINTS))
290 target_flags &= ~MASK_SAFE_HINTS;
293 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
296 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
297 struct attribute_spec.handler. */
299 /* True if MODE is valid for the target. By "valid", we mean able to
300 be manipulated in non-trivial ways. In particular, this means all
301 the arithmetic is supported. */
302 static bool
303 spu_scalar_mode_supported_p (machine_mode mode)
305 switch (mode)
307 case QImode:
308 case HImode:
309 case SImode:
310 case SFmode:
311 case DImode:
312 case TImode:
313 case DFmode:
314 return true;
316 default:
317 return false;
321 /* Similarly for vector modes. "Supported" here is less strict. At
322 least some operations are supported; need to check optabs or builtins
323 for further details. */
324 static bool
325 spu_vector_mode_supported_p (machine_mode mode)
327 switch (mode)
329 case V16QImode:
330 case V8HImode:
331 case V4SImode:
332 case V2DImode:
333 case V4SFmode:
334 case V2DFmode:
335 return true;
337 default:
338 return false;
342 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
343 least significant bytes of the outer mode. This function returns
344 TRUE for the SUBREG's where this is correct. */
346 valid_subreg (rtx op)
348 machine_mode om = GET_MODE (op);
349 machine_mode im = GET_MODE (SUBREG_REG (op));
350 return om != VOIDmode && im != VOIDmode
351 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
352 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
353 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
356 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
357 and adjust the start offset. */
358 static rtx
359 adjust_operand (rtx op, HOST_WIDE_INT * start)
361 machine_mode mode;
362 int op_size;
363 /* Strip any paradoxical SUBREG. */
364 if (GET_CODE (op) == SUBREG
365 && (GET_MODE_BITSIZE (GET_MODE (op))
366 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
368 if (start)
369 *start -=
370 GET_MODE_BITSIZE (GET_MODE (op)) -
371 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
372 op = SUBREG_REG (op);
374 /* If it is smaller than SI, assure a SUBREG */
375 op_size = GET_MODE_BITSIZE (GET_MODE (op));
376 if (op_size < 32)
378 if (start)
379 *start += 32 - op_size;
380 op_size = 32;
382 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
383 mode = mode_for_size (op_size, MODE_INT, 0);
384 if (mode != GET_MODE (op))
385 op = gen_rtx_SUBREG (mode, op, 0);
386 return op;
389 void
390 spu_expand_extv (rtx ops[], int unsignedp)
392 rtx dst = ops[0], src = ops[1];
393 HOST_WIDE_INT width = INTVAL (ops[2]);
394 HOST_WIDE_INT start = INTVAL (ops[3]);
395 HOST_WIDE_INT align_mask;
396 rtx s0, s1, mask, r0;
398 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
400 if (MEM_P (src))
402 /* First, determine if we need 1 TImode load or 2. We need only 1
403 if the bits being extracted do not cross the alignment boundary
404 as determined by the MEM and its address. */
406 align_mask = -MEM_ALIGN (src);
407 if ((start & align_mask) == ((start + width - 1) & align_mask))
409 /* Alignment is sufficient for 1 load. */
410 s0 = gen_reg_rtx (TImode);
411 r0 = spu_expand_load (s0, 0, src, start / 8);
412 start &= 7;
413 if (r0)
414 emit_insn (gen_rotqby_ti (s0, s0, r0));
416 else
418 /* Need 2 loads. */
419 s0 = gen_reg_rtx (TImode);
420 s1 = gen_reg_rtx (TImode);
421 r0 = spu_expand_load (s0, s1, src, start / 8);
422 start &= 7;
424 gcc_assert (start + width <= 128);
425 if (r0)
427 rtx r1 = gen_reg_rtx (SImode);
428 mask = gen_reg_rtx (TImode);
429 emit_move_insn (mask, GEN_INT (-1));
430 emit_insn (gen_rotqby_ti (s0, s0, r0));
431 emit_insn (gen_rotqby_ti (s1, s1, r0));
432 if (GET_CODE (r0) == CONST_INT)
433 r1 = GEN_INT (INTVAL (r0) & 15);
434 else
435 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
436 emit_insn (gen_shlqby_ti (mask, mask, r1));
437 emit_insn (gen_selb (s0, s1, s0, mask));
442 else if (GET_CODE (src) == SUBREG)
444 rtx r = SUBREG_REG (src);
445 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
446 s0 = gen_reg_rtx (TImode);
447 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
448 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
449 else
450 emit_move_insn (s0, src);
452 else
454 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
455 s0 = gen_reg_rtx (TImode);
456 emit_move_insn (s0, src);
459 /* Now s0 is TImode and contains the bits to extract at start. */
461 if (start)
462 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
464 if (128 - width)
465 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
467 emit_move_insn (dst, s0);
470 void
471 spu_expand_insv (rtx ops[])
473 HOST_WIDE_INT width = INTVAL (ops[1]);
474 HOST_WIDE_INT start = INTVAL (ops[2]);
475 HOST_WIDE_INT maskbits;
476 machine_mode dst_mode;
477 rtx dst = ops[0], src = ops[3];
478 int dst_size;
479 rtx mask;
480 rtx shift_reg;
481 int shift;
484 if (GET_CODE (ops[0]) == MEM)
485 dst = gen_reg_rtx (TImode);
486 else
487 dst = adjust_operand (dst, &start);
488 dst_mode = GET_MODE (dst);
489 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
491 if (CONSTANT_P (src))
493 machine_mode m =
494 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
495 src = force_reg (m, convert_to_mode (m, src, 0));
497 src = adjust_operand (src, 0);
499 mask = gen_reg_rtx (dst_mode);
500 shift_reg = gen_reg_rtx (dst_mode);
501 shift = dst_size - start - width;
503 /* It's not safe to use subreg here because the compiler assumes
504 that the SUBREG_REG is right justified in the SUBREG. */
505 convert_move (shift_reg, src, 1);
507 if (shift > 0)
509 switch (dst_mode)
511 case SImode:
512 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
513 break;
514 case DImode:
515 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
516 break;
517 case TImode:
518 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
519 break;
520 default:
521 abort ();
524 else if (shift < 0)
525 abort ();
527 switch (dst_size)
529 case 32:
530 maskbits = (-1ll << (32 - width - start));
531 if (start)
532 maskbits += (1ll << (32 - start));
533 emit_move_insn (mask, GEN_INT (maskbits));
534 break;
535 case 64:
536 maskbits = (-1ll << (64 - width - start));
537 if (start)
538 maskbits += (1ll << (64 - start));
539 emit_move_insn (mask, GEN_INT (maskbits));
540 break;
541 case 128:
543 unsigned char arr[16];
544 int i = start / 8;
545 memset (arr, 0, sizeof (arr));
546 arr[i] = 0xff >> (start & 7);
547 for (i++; i <= (start + width - 1) / 8; i++)
548 arr[i] = 0xff;
549 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
550 emit_move_insn (mask, array_to_constant (TImode, arr));
552 break;
553 default:
554 abort ();
556 if (GET_CODE (ops[0]) == MEM)
558 rtx low = gen_reg_rtx (SImode);
559 rtx rotl = gen_reg_rtx (SImode);
560 rtx mask0 = gen_reg_rtx (TImode);
561 rtx addr;
562 rtx addr0;
563 rtx addr1;
564 rtx mem;
566 addr = force_reg (Pmode, XEXP (ops[0], 0));
567 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
568 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
569 emit_insn (gen_negsi2 (rotl, low));
570 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
571 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
572 mem = change_address (ops[0], TImode, addr0);
573 set_mem_alias_set (mem, 0);
574 emit_move_insn (dst, mem);
575 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
576 if (start + width > MEM_ALIGN (ops[0]))
578 rtx shl = gen_reg_rtx (SImode);
579 rtx mask1 = gen_reg_rtx (TImode);
580 rtx dst1 = gen_reg_rtx (TImode);
581 rtx mem1;
582 addr1 = plus_constant (Pmode, addr, 16);
583 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
584 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
585 emit_insn (gen_shlqby_ti (mask1, mask, shl));
586 mem1 = change_address (ops[0], TImode, addr1);
587 set_mem_alias_set (mem1, 0);
588 emit_move_insn (dst1, mem1);
589 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
590 emit_move_insn (mem1, dst1);
592 emit_move_insn (mem, dst);
594 else
595 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
600 spu_expand_block_move (rtx ops[])
602 HOST_WIDE_INT bytes, align, offset;
603 rtx src, dst, sreg, dreg, target;
604 int i;
605 if (GET_CODE (ops[2]) != CONST_INT
606 || GET_CODE (ops[3]) != CONST_INT
607 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
608 return 0;
610 bytes = INTVAL (ops[2]);
611 align = INTVAL (ops[3]);
613 if (bytes <= 0)
614 return 1;
616 dst = ops[0];
617 src = ops[1];
619 if (align == 16)
621 for (offset = 0; offset + 16 <= bytes; offset += 16)
623 dst = adjust_address (ops[0], V16QImode, offset);
624 src = adjust_address (ops[1], V16QImode, offset);
625 emit_move_insn (dst, src);
627 if (offset < bytes)
629 rtx mask;
630 unsigned char arr[16] = { 0 };
631 for (i = 0; i < bytes - offset; i++)
632 arr[i] = 0xff;
633 dst = adjust_address (ops[0], V16QImode, offset);
634 src = adjust_address (ops[1], V16QImode, offset);
635 mask = gen_reg_rtx (V16QImode);
636 sreg = gen_reg_rtx (V16QImode);
637 dreg = gen_reg_rtx (V16QImode);
638 target = gen_reg_rtx (V16QImode);
639 emit_move_insn (mask, array_to_constant (V16QImode, arr));
640 emit_move_insn (dreg, dst);
641 emit_move_insn (sreg, src);
642 emit_insn (gen_selb (target, dreg, sreg, mask));
643 emit_move_insn (dst, target);
645 return 1;
647 return 0;
650 enum spu_comp_code
651 { SPU_EQ, SPU_GT, SPU_GTU };
653 int spu_comp_icode[12][3] = {
654 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
655 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
656 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
657 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
658 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
659 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
660 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
661 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
662 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
663 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
664 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
665 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
668 /* Generate a compare for CODE. Return a brand-new rtx that represents
669 the result of the compare. GCC can figure this out too if we don't
670 provide all variations of compares, but GCC always wants to use
671 WORD_MODE, we can generate better code in most cases if we do it
672 ourselves. */
673 void
674 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
676 int reverse_compare = 0;
677 int reverse_test = 0;
678 rtx compare_result, eq_result;
679 rtx comp_rtx, eq_rtx;
680 machine_mode comp_mode;
681 machine_mode op_mode;
682 enum spu_comp_code scode, eq_code;
683 enum insn_code ior_code;
684 enum rtx_code code = GET_CODE (cmp);
685 rtx op0 = XEXP (cmp, 0);
686 rtx op1 = XEXP (cmp, 1);
687 int index;
688 int eq_test = 0;
690 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
691 and so on, to keep the constant in operand 1. */
692 if (GET_CODE (op1) == CONST_INT)
694 HOST_WIDE_INT val = INTVAL (op1) - 1;
695 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
696 switch (code)
698 case GE:
699 op1 = GEN_INT (val);
700 code = GT;
701 break;
702 case LT:
703 op1 = GEN_INT (val);
704 code = LE;
705 break;
706 case GEU:
707 op1 = GEN_INT (val);
708 code = GTU;
709 break;
710 case LTU:
711 op1 = GEN_INT (val);
712 code = LEU;
713 break;
714 default:
715 break;
719 /* However, if we generate an integer result, performing a reverse test
720 would require an extra negation, so avoid that where possible. */
721 if (GET_CODE (op1) == CONST_INT && is_set == 1)
723 HOST_WIDE_INT val = INTVAL (op1) + 1;
724 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
725 switch (code)
727 case LE:
728 op1 = GEN_INT (val);
729 code = LT;
730 break;
731 case LEU:
732 op1 = GEN_INT (val);
733 code = LTU;
734 break;
735 default:
736 break;
740 comp_mode = SImode;
741 op_mode = GET_MODE (op0);
743 switch (code)
745 case GE:
746 scode = SPU_GT;
747 if (HONOR_NANS (op_mode))
749 reverse_compare = 0;
750 reverse_test = 0;
751 eq_test = 1;
752 eq_code = SPU_EQ;
754 else
756 reverse_compare = 1;
757 reverse_test = 1;
759 break;
760 case LE:
761 scode = SPU_GT;
762 if (HONOR_NANS (op_mode))
764 reverse_compare = 1;
765 reverse_test = 0;
766 eq_test = 1;
767 eq_code = SPU_EQ;
769 else
771 reverse_compare = 0;
772 reverse_test = 1;
774 break;
775 case LT:
776 reverse_compare = 1;
777 reverse_test = 0;
778 scode = SPU_GT;
779 break;
780 case GEU:
781 reverse_compare = 1;
782 reverse_test = 1;
783 scode = SPU_GTU;
784 break;
785 case LEU:
786 reverse_compare = 0;
787 reverse_test = 1;
788 scode = SPU_GTU;
789 break;
790 case LTU:
791 reverse_compare = 1;
792 reverse_test = 0;
793 scode = SPU_GTU;
794 break;
795 case NE:
796 reverse_compare = 0;
797 reverse_test = 1;
798 scode = SPU_EQ;
799 break;
801 case EQ:
802 scode = SPU_EQ;
803 break;
804 case GT:
805 scode = SPU_GT;
806 break;
807 case GTU:
808 scode = SPU_GTU;
809 break;
810 default:
811 scode = SPU_EQ;
812 break;
815 switch (op_mode)
817 case QImode:
818 index = 0;
819 comp_mode = QImode;
820 break;
821 case HImode:
822 index = 1;
823 comp_mode = HImode;
824 break;
825 case SImode:
826 index = 2;
827 break;
828 case DImode:
829 index = 3;
830 break;
831 case TImode:
832 index = 4;
833 break;
834 case SFmode:
835 index = 5;
836 break;
837 case DFmode:
838 index = 6;
839 break;
840 case V16QImode:
841 index = 7;
842 comp_mode = op_mode;
843 break;
844 case V8HImode:
845 index = 8;
846 comp_mode = op_mode;
847 break;
848 case V4SImode:
849 index = 9;
850 comp_mode = op_mode;
851 break;
852 case V4SFmode:
853 index = 10;
854 comp_mode = V4SImode;
855 break;
856 case V2DFmode:
857 index = 11;
858 comp_mode = V2DImode;
859 break;
860 case V2DImode:
861 default:
862 abort ();
865 if (GET_MODE (op1) == DFmode
866 && (scode != SPU_GT && scode != SPU_EQ))
867 abort ();
869 if (is_set == 0 && op1 == const0_rtx
870 && (GET_MODE (op0) == SImode
871 || GET_MODE (op0) == HImode
872 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
874 /* Don't need to set a register with the result when we are
875 comparing against zero and branching. */
876 reverse_test = !reverse_test;
877 compare_result = op0;
879 else
881 compare_result = gen_reg_rtx (comp_mode);
883 if (reverse_compare)
885 rtx t = op1;
886 op1 = op0;
887 op0 = t;
890 if (spu_comp_icode[index][scode] == 0)
891 abort ();
893 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
894 (op0, op_mode))
895 op0 = force_reg (op_mode, op0);
896 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
897 (op1, op_mode))
898 op1 = force_reg (op_mode, op1);
899 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
900 op0, op1);
901 if (comp_rtx == 0)
902 abort ();
903 emit_insn (comp_rtx);
905 if (eq_test)
907 eq_result = gen_reg_rtx (comp_mode);
908 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
909 op0, op1);
910 if (eq_rtx == 0)
911 abort ();
912 emit_insn (eq_rtx);
913 ior_code = optab_handler (ior_optab, comp_mode);
914 gcc_assert (ior_code != CODE_FOR_nothing);
915 emit_insn (GEN_FCN (ior_code)
916 (compare_result, compare_result, eq_result));
920 if (is_set == 0)
922 rtx bcomp;
923 rtx loc_ref;
925 /* We don't have branch on QI compare insns, so we convert the
926 QI compare result to a HI result. */
927 if (comp_mode == QImode)
929 rtx old_res = compare_result;
930 compare_result = gen_reg_rtx (HImode);
931 comp_mode = HImode;
932 emit_insn (gen_extendqihi2 (compare_result, old_res));
935 if (reverse_test)
936 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
937 else
938 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
940 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
941 emit_jump_insn (gen_rtx_SET (pc_rtx,
942 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
943 loc_ref, pc_rtx)));
945 else if (is_set == 2)
947 rtx target = operands[0];
948 int compare_size = GET_MODE_BITSIZE (comp_mode);
949 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
950 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
951 rtx select_mask;
952 rtx op_t = operands[2];
953 rtx op_f = operands[3];
955 /* The result of the comparison can be SI, HI or QI mode. Create a
956 mask based on that result. */
957 if (target_size > compare_size)
959 select_mask = gen_reg_rtx (mode);
960 emit_insn (gen_extend_compare (select_mask, compare_result));
962 else if (target_size < compare_size)
963 select_mask =
964 gen_rtx_SUBREG (mode, compare_result,
965 (compare_size - target_size) / BITS_PER_UNIT);
966 else if (comp_mode != mode)
967 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
968 else
969 select_mask = compare_result;
971 if (GET_MODE (target) != GET_MODE (op_t)
972 || GET_MODE (target) != GET_MODE (op_f))
973 abort ();
975 if (reverse_test)
976 emit_insn (gen_selb (target, op_t, op_f, select_mask));
977 else
978 emit_insn (gen_selb (target, op_f, op_t, select_mask));
980 else
982 rtx target = operands[0];
983 if (reverse_test)
984 emit_insn (gen_rtx_SET (compare_result,
985 gen_rtx_NOT (comp_mode, compare_result)));
986 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
987 emit_insn (gen_extendhisi2 (target, compare_result));
988 else if (GET_MODE (target) == SImode
989 && GET_MODE (compare_result) == QImode)
990 emit_insn (gen_extend_compare (target, compare_result));
991 else
992 emit_move_insn (target, compare_result);
996 HOST_WIDE_INT
997 const_double_to_hwint (rtx x)
999 HOST_WIDE_INT val;
1000 REAL_VALUE_TYPE rv;
1001 if (GET_MODE (x) == SFmode)
1003 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1004 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1006 else if (GET_MODE (x) == DFmode)
1008 long l[2];
1009 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1010 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1011 val = l[0];
1012 val = (val << 32) | (l[1] & 0xffffffff);
1014 else
1015 abort ();
1016 return val;
1020 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1022 long tv[2];
1023 REAL_VALUE_TYPE rv;
1024 gcc_assert (mode == SFmode || mode == DFmode);
1026 if (mode == SFmode)
1027 tv[0] = (v << 32) >> 32;
1028 else if (mode == DFmode)
1030 tv[1] = (v << 32) >> 32;
1031 tv[0] = v >> 32;
1033 real_from_target (&rv, tv, mode);
1034 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1037 void
1038 print_operand_address (FILE * file, register rtx addr)
1040 rtx reg;
1041 rtx offset;
1043 if (GET_CODE (addr) == AND
1044 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1045 && INTVAL (XEXP (addr, 1)) == -16)
1046 addr = XEXP (addr, 0);
1048 switch (GET_CODE (addr))
1050 case REG:
1051 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1052 break;
1054 case PLUS:
1055 reg = XEXP (addr, 0);
1056 offset = XEXP (addr, 1);
1057 if (GET_CODE (offset) == REG)
1059 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1060 reg_names[REGNO (offset)]);
1062 else if (GET_CODE (offset) == CONST_INT)
1064 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1065 INTVAL (offset), reg_names[REGNO (reg)]);
1067 else
1068 abort ();
1069 break;
1071 case CONST:
1072 case LABEL_REF:
1073 case SYMBOL_REF:
1074 case CONST_INT:
1075 output_addr_const (file, addr);
1076 break;
1078 default:
1079 debug_rtx (addr);
1080 abort ();
1084 void
1085 print_operand (FILE * file, rtx x, int code)
1087 machine_mode mode = GET_MODE (x);
1088 HOST_WIDE_INT val;
1089 unsigned char arr[16];
1090 int xcode = GET_CODE (x);
1091 int i, info;
1092 if (GET_MODE (x) == VOIDmode)
1093 switch (code)
1095 case 'L': /* 128 bits, signed */
1096 case 'm': /* 128 bits, signed */
1097 case 'T': /* 128 bits, signed */
1098 case 't': /* 128 bits, signed */
1099 mode = TImode;
1100 break;
1101 case 'K': /* 64 bits, signed */
1102 case 'k': /* 64 bits, signed */
1103 case 'D': /* 64 bits, signed */
1104 case 'd': /* 64 bits, signed */
1105 mode = DImode;
1106 break;
1107 case 'J': /* 32 bits, signed */
1108 case 'j': /* 32 bits, signed */
1109 case 's': /* 32 bits, signed */
1110 case 'S': /* 32 bits, signed */
1111 mode = SImode;
1112 break;
1114 switch (code)
1117 case 'j': /* 32 bits, signed */
1118 case 'k': /* 64 bits, signed */
1119 case 'm': /* 128 bits, signed */
1120 if (xcode == CONST_INT
1121 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1123 gcc_assert (logical_immediate_p (x, mode));
1124 constant_to_array (mode, x, arr);
1125 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1126 val = trunc_int_for_mode (val, SImode);
1127 switch (which_logical_immediate (val))
1129 case SPU_ORI:
1130 break;
1131 case SPU_ORHI:
1132 fprintf (file, "h");
1133 break;
1134 case SPU_ORBI:
1135 fprintf (file, "b");
1136 break;
1137 default:
1138 gcc_unreachable();
1141 else
1142 gcc_unreachable();
1143 return;
1145 case 'J': /* 32 bits, signed */
1146 case 'K': /* 64 bits, signed */
1147 case 'L': /* 128 bits, signed */
1148 if (xcode == CONST_INT
1149 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1151 gcc_assert (logical_immediate_p (x, mode)
1152 || iohl_immediate_p (x, mode));
1153 constant_to_array (mode, x, arr);
1154 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1155 val = trunc_int_for_mode (val, SImode);
1156 switch (which_logical_immediate (val))
1158 case SPU_ORI:
1159 case SPU_IOHL:
1160 break;
1161 case SPU_ORHI:
1162 val = trunc_int_for_mode (val, HImode);
1163 break;
1164 case SPU_ORBI:
1165 val = trunc_int_for_mode (val, QImode);
1166 break;
1167 default:
1168 gcc_unreachable();
1170 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1172 else
1173 gcc_unreachable();
1174 return;
1176 case 't': /* 128 bits, signed */
1177 case 'd': /* 64 bits, signed */
1178 case 's': /* 32 bits, signed */
1179 if (CONSTANT_P (x))
1181 enum immediate_class c = classify_immediate (x, mode);
1182 switch (c)
1184 case IC_IL1:
1185 constant_to_array (mode, x, arr);
1186 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1187 val = trunc_int_for_mode (val, SImode);
1188 switch (which_immediate_load (val))
1190 case SPU_IL:
1191 break;
1192 case SPU_ILA:
1193 fprintf (file, "a");
1194 break;
1195 case SPU_ILH:
1196 fprintf (file, "h");
1197 break;
1198 case SPU_ILHU:
1199 fprintf (file, "hu");
1200 break;
1201 default:
1202 gcc_unreachable ();
1204 break;
1205 case IC_CPAT:
1206 constant_to_array (mode, x, arr);
1207 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1208 if (info == 1)
1209 fprintf (file, "b");
1210 else if (info == 2)
1211 fprintf (file, "h");
1212 else if (info == 4)
1213 fprintf (file, "w");
1214 else if (info == 8)
1215 fprintf (file, "d");
1216 break;
1217 case IC_IL1s:
1218 if (xcode == CONST_VECTOR)
1220 x = CONST_VECTOR_ELT (x, 0);
1221 xcode = GET_CODE (x);
1223 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1224 fprintf (file, "a");
1225 else if (xcode == HIGH)
1226 fprintf (file, "hu");
1227 break;
1228 case IC_FSMBI:
1229 case IC_FSMBI2:
1230 case IC_IL2:
1231 case IC_IL2s:
1232 case IC_POOL:
1233 abort ();
1236 else
1237 gcc_unreachable ();
1238 return;
1240 case 'T': /* 128 bits, signed */
1241 case 'D': /* 64 bits, signed */
1242 case 'S': /* 32 bits, signed */
1243 if (CONSTANT_P (x))
1245 enum immediate_class c = classify_immediate (x, mode);
1246 switch (c)
1248 case IC_IL1:
1249 constant_to_array (mode, x, arr);
1250 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1251 val = trunc_int_for_mode (val, SImode);
1252 switch (which_immediate_load (val))
1254 case SPU_IL:
1255 case SPU_ILA:
1256 break;
1257 case SPU_ILH:
1258 case SPU_ILHU:
1259 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1260 break;
1261 default:
1262 gcc_unreachable ();
1264 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1265 break;
1266 case IC_FSMBI:
1267 constant_to_array (mode, x, arr);
1268 val = 0;
1269 for (i = 0; i < 16; i++)
1271 val <<= 1;
1272 val |= arr[i] & 1;
1274 print_operand (file, GEN_INT (val), 0);
1275 break;
1276 case IC_CPAT:
1277 constant_to_array (mode, x, arr);
1278 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1279 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1280 break;
1281 case IC_IL1s:
1282 if (xcode == HIGH)
1283 x = XEXP (x, 0);
1284 if (GET_CODE (x) == CONST_VECTOR)
1285 x = CONST_VECTOR_ELT (x, 0);
1286 output_addr_const (file, x);
1287 if (xcode == HIGH)
1288 fprintf (file, "@h");
1289 break;
1290 case IC_IL2:
1291 case IC_IL2s:
1292 case IC_FSMBI2:
1293 case IC_POOL:
1294 abort ();
1297 else
1298 gcc_unreachable ();
1299 return;
1301 case 'C':
1302 if (xcode == CONST_INT)
1304 /* Only 4 least significant bits are relevant for generate
1305 control word instructions. */
1306 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1307 return;
1309 break;
1311 case 'M': /* print code for c*d */
1312 if (GET_CODE (x) == CONST_INT)
1313 switch (INTVAL (x))
1315 case 1:
1316 fprintf (file, "b");
1317 break;
1318 case 2:
1319 fprintf (file, "h");
1320 break;
1321 case 4:
1322 fprintf (file, "w");
1323 break;
1324 case 8:
1325 fprintf (file, "d");
1326 break;
1327 default:
1328 gcc_unreachable();
1330 else
1331 gcc_unreachable();
1332 return;
1334 case 'N': /* Negate the operand */
1335 if (xcode == CONST_INT)
1336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1337 else if (xcode == CONST_VECTOR)
1338 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1339 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1340 return;
1342 case 'I': /* enable/disable interrupts */
1343 if (xcode == CONST_INT)
1344 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1345 return;
1347 case 'b': /* branch modifiers */
1348 if (xcode == REG)
1349 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1350 else if (COMPARISON_P (x))
1351 fprintf (file, "%s", xcode == NE ? "n" : "");
1352 return;
1354 case 'i': /* indirect call */
1355 if (xcode == MEM)
1357 if (GET_CODE (XEXP (x, 0)) == REG)
1358 /* Used in indirect function calls. */
1359 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1360 else
1361 output_address (XEXP (x, 0));
1363 return;
1365 case 'p': /* load/store */
1366 if (xcode == MEM)
1368 x = XEXP (x, 0);
1369 xcode = GET_CODE (x);
1371 if (xcode == AND)
1373 x = XEXP (x, 0);
1374 xcode = GET_CODE (x);
1376 if (xcode == REG)
1377 fprintf (file, "d");
1378 else if (xcode == CONST_INT)
1379 fprintf (file, "a");
1380 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1381 fprintf (file, "r");
1382 else if (xcode == PLUS || xcode == LO_SUM)
1384 if (GET_CODE (XEXP (x, 1)) == REG)
1385 fprintf (file, "x");
1386 else
1387 fprintf (file, "d");
1389 return;
1391 case 'e':
1392 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1393 val &= 0x7;
1394 output_addr_const (file, GEN_INT (val));
1395 return;
1397 case 'f':
1398 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1399 val &= 0x1f;
1400 output_addr_const (file, GEN_INT (val));
1401 return;
1403 case 'g':
1404 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1405 val &= 0x3f;
1406 output_addr_const (file, GEN_INT (val));
1407 return;
1409 case 'h':
1410 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1411 val = (val >> 3) & 0x1f;
1412 output_addr_const (file, GEN_INT (val));
1413 return;
1415 case 'E':
1416 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1417 val = -val;
1418 val &= 0x7;
1419 output_addr_const (file, GEN_INT (val));
1420 return;
1422 case 'F':
1423 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1424 val = -val;
1425 val &= 0x1f;
1426 output_addr_const (file, GEN_INT (val));
1427 return;
1429 case 'G':
1430 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1431 val = -val;
1432 val &= 0x3f;
1433 output_addr_const (file, GEN_INT (val));
1434 return;
1436 case 'H':
1437 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1438 val = -(val & -8ll);
1439 val = (val >> 3) & 0x1f;
1440 output_addr_const (file, GEN_INT (val));
1441 return;
1443 case 'v':
1444 case 'w':
1445 constant_to_array (mode, x, arr);
1446 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1447 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1448 return;
1450 case 0:
1451 if (xcode == REG)
1452 fprintf (file, "%s", reg_names[REGNO (x)]);
1453 else if (xcode == MEM)
1454 output_address (XEXP (x, 0));
1455 else if (xcode == CONST_VECTOR)
1456 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1457 else
1458 output_addr_const (file, x);
1459 return;
1461 /* unused letters
1462 o qr u yz
1463 AB OPQR UVWXYZ */
1464 default:
1465 output_operand_lossage ("invalid %%xn code");
1467 gcc_unreachable ();
1470 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1471 caller saved register. For leaf functions it is more efficient to
1472 use a volatile register because we won't need to save and restore the
1473 pic register. This routine is only valid after register allocation
1474 is completed, so we can pick an unused register. */
1475 static rtx
1476 get_pic_reg (void)
1478 if (!reload_completed && !reload_in_progress)
1479 abort ();
1481 /* If we've already made the decision, we need to keep with it. Once we've
1482 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1483 return true since the register is now live; this should not cause us to
1484 "switch back" to using pic_offset_table_rtx. */
1485 if (!cfun->machine->pic_reg)
1487 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1488 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1489 else
1490 cfun->machine->pic_reg = pic_offset_table_rtx;
1493 return cfun->machine->pic_reg;
1496 /* Split constant addresses to handle cases that are too large.
1497 Add in the pic register when in PIC mode.
1498 Split immediates that require more than 1 instruction. */
1500 spu_split_immediate (rtx * ops)
1502 machine_mode mode = GET_MODE (ops[0]);
1503 enum immediate_class c = classify_immediate (ops[1], mode);
1505 switch (c)
1507 case IC_IL2:
1509 unsigned char arrhi[16];
1510 unsigned char arrlo[16];
1511 rtx to, temp, hi, lo;
1512 int i;
1513 machine_mode imode = mode;
1514 /* We need to do reals as ints because the constant used in the
1515 IOR might not be a legitimate real constant. */
1516 imode = int_mode_for_mode (mode);
1517 constant_to_array (mode, ops[1], arrhi);
1518 if (imode != mode)
1519 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1520 else
1521 to = ops[0];
1522 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1523 for (i = 0; i < 16; i += 4)
1525 arrlo[i + 2] = arrhi[i + 2];
1526 arrlo[i + 3] = arrhi[i + 3];
1527 arrlo[i + 0] = arrlo[i + 1] = 0;
1528 arrhi[i + 2] = arrhi[i + 3] = 0;
1530 hi = array_to_constant (imode, arrhi);
1531 lo = array_to_constant (imode, arrlo);
1532 emit_move_insn (temp, hi);
1533 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1534 return 1;
1536 case IC_FSMBI2:
1538 unsigned char arr_fsmbi[16];
1539 unsigned char arr_andbi[16];
1540 rtx to, reg_fsmbi, reg_and;
1541 int i;
1542 machine_mode imode = mode;
1543 /* We need to do reals as ints because the constant used in the
1544 * AND might not be a legitimate real constant. */
1545 imode = int_mode_for_mode (mode);
1546 constant_to_array (mode, ops[1], arr_fsmbi);
1547 if (imode != mode)
1548 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1549 else
1550 to = ops[0];
1551 for (i = 0; i < 16; i++)
1552 if (arr_fsmbi[i] != 0)
1554 arr_andbi[0] = arr_fsmbi[i];
1555 arr_fsmbi[i] = 0xff;
1557 for (i = 1; i < 16; i++)
1558 arr_andbi[i] = arr_andbi[0];
1559 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1560 reg_and = array_to_constant (imode, arr_andbi);
1561 emit_move_insn (to, reg_fsmbi);
1562 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1563 return 1;
1565 case IC_POOL:
1566 if (reload_in_progress || reload_completed)
1568 rtx mem = force_const_mem (mode, ops[1]);
1569 if (TARGET_LARGE_MEM)
1571 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1572 emit_move_insn (addr, XEXP (mem, 0));
1573 mem = replace_equiv_address (mem, addr);
1575 emit_move_insn (ops[0], mem);
1576 return 1;
1578 break;
1579 case IC_IL1s:
1580 case IC_IL2s:
1581 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1583 if (c == IC_IL2s)
1585 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1586 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1588 else if (flag_pic)
1589 emit_insn (gen_pic (ops[0], ops[1]));
1590 if (flag_pic)
1592 rtx pic_reg = get_pic_reg ();
1593 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1595 return flag_pic || c == IC_IL2s;
1597 break;
1598 case IC_IL1:
1599 case IC_FSMBI:
1600 case IC_CPAT:
1601 break;
1603 return 0;
1606 /* SAVING is TRUE when we are generating the actual load and store
1607 instructions for REGNO. When determining the size of the stack
1608 needed for saving register we must allocate enough space for the
1609 worst case, because we don't always have the information early enough
1610 to not allocate it. But we can at least eliminate the actual loads
1611 and stores during the prologue/epilogue. */
1612 static int
1613 need_to_save_reg (int regno, int saving)
1615 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1616 return 1;
1617 if (flag_pic
1618 && regno == PIC_OFFSET_TABLE_REGNUM
1619 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1620 return 1;
1621 return 0;
1624 /* This function is only correct starting with local register
1625 allocation */
1627 spu_saved_regs_size (void)
1629 int reg_save_size = 0;
1630 int regno;
1632 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1633 if (need_to_save_reg (regno, 0))
1634 reg_save_size += 0x10;
1635 return reg_save_size;
1638 static rtx_insn *
1639 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1641 rtx reg = gen_rtx_REG (V4SImode, regno);
1642 rtx mem =
1643 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1644 return emit_insn (gen_movv4si (mem, reg));
1647 static rtx_insn *
1648 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1650 rtx reg = gen_rtx_REG (V4SImode, regno);
1651 rtx mem =
1652 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1653 return emit_insn (gen_movv4si (reg, mem));
1656 /* This happens after reload, so we need to expand it. */
1657 static rtx_insn *
1658 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1660 rtx_insn *insn;
1661 if (satisfies_constraint_K (GEN_INT (imm)))
1663 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1665 else
1667 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1668 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1669 if (REGNO (src) == REGNO (scratch))
1670 abort ();
1672 return insn;
1675 /* Return nonzero if this function is known to have a null epilogue. */
1678 direct_return (void)
1680 if (reload_completed)
1682 if (cfun->static_chain_decl == 0
1683 && (spu_saved_regs_size ()
1684 + get_frame_size ()
1685 + crtl->outgoing_args_size
1686 + crtl->args.pretend_args_size == 0)
1687 && crtl->is_leaf)
1688 return 1;
1690 return 0;
1694 The stack frame looks like this:
1695 +-------------+
1696 | incoming |
1697 | args |
1698 AP -> +-------------+
1699 | $lr save |
1700 +-------------+
1701 prev SP | back chain |
1702 +-------------+
1703 | var args |
1704 | reg save | crtl->args.pretend_args_size bytes
1705 +-------------+
1706 | ... |
1707 | saved regs | spu_saved_regs_size() bytes
1708 FP -> +-------------+
1709 | ... |
1710 | vars | get_frame_size() bytes
1711 HFP -> +-------------+
1712 | ... |
1713 | outgoing |
1714 | args | crtl->outgoing_args_size bytes
1715 +-------------+
1716 | $lr of next |
1717 | frame |
1718 +-------------+
1719 | back chain |
1720 SP -> +-------------+
1723 void
1724 spu_expand_prologue (void)
1726 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1727 HOST_WIDE_INT total_size;
1728 HOST_WIDE_INT saved_regs_size;
1729 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1730 rtx scratch_reg_0, scratch_reg_1;
1731 rtx_insn *insn;
1732 rtx real;
1734 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1735 cfun->machine->pic_reg = pic_offset_table_rtx;
1737 if (spu_naked_function_p (current_function_decl))
1738 return;
1740 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1741 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1743 saved_regs_size = spu_saved_regs_size ();
1744 total_size = size + saved_regs_size
1745 + crtl->outgoing_args_size
1746 + crtl->args.pretend_args_size;
1748 if (!crtl->is_leaf
1749 || cfun->calls_alloca || total_size > 0)
1750 total_size += STACK_POINTER_OFFSET;
1752 /* Save this first because code after this might use the link
1753 register as a scratch register. */
1754 if (!crtl->is_leaf)
1756 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1757 RTX_FRAME_RELATED_P (insn) = 1;
1760 if (total_size > 0)
1762 offset = -crtl->args.pretend_args_size;
1763 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1764 if (need_to_save_reg (regno, 1))
1766 offset -= 16;
1767 insn = frame_emit_store (regno, sp_reg, offset);
1768 RTX_FRAME_RELATED_P (insn) = 1;
1772 if (flag_pic && cfun->machine->pic_reg)
1774 rtx pic_reg = cfun->machine->pic_reg;
1775 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1776 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1779 if (total_size > 0)
1781 if (flag_stack_check)
1783 /* We compare against total_size-1 because
1784 ($sp >= total_size) <=> ($sp > total_size-1) */
1785 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1786 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1787 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1788 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1790 emit_move_insn (scratch_v4si, size_v4si);
1791 size_v4si = scratch_v4si;
1793 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1794 emit_insn (gen_vec_extractv4si
1795 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1796 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1799 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1800 the value of the previous $sp because we save it as the back
1801 chain. */
1802 if (total_size <= 2000)
1804 /* In this case we save the back chain first. */
1805 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1806 insn =
1807 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1809 else
1811 insn = emit_move_insn (scratch_reg_0, sp_reg);
1812 insn =
1813 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1815 RTX_FRAME_RELATED_P (insn) = 1;
1816 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1817 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1819 if (total_size > 2000)
1821 /* Save the back chain ptr */
1822 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1825 if (frame_pointer_needed)
1827 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1828 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1829 + crtl->outgoing_args_size;
1830 /* Set the new frame_pointer */
1831 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1832 RTX_FRAME_RELATED_P (insn) = 1;
1833 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1834 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1835 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1839 if (flag_stack_usage_info)
1840 current_function_static_stack_size = total_size;
1843 void
1844 spu_expand_epilogue (bool sibcall_p)
1846 int size = get_frame_size (), offset, regno;
1847 HOST_WIDE_INT saved_regs_size, total_size;
1848 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1849 rtx scratch_reg_0;
1851 if (spu_naked_function_p (current_function_decl))
1852 return;
1854 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1856 saved_regs_size = spu_saved_regs_size ();
1857 total_size = size + saved_regs_size
1858 + crtl->outgoing_args_size
1859 + crtl->args.pretend_args_size;
1861 if (!crtl->is_leaf
1862 || cfun->calls_alloca || total_size > 0)
1863 total_size += STACK_POINTER_OFFSET;
1865 if (total_size > 0)
1867 if (cfun->calls_alloca)
1868 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1869 else
1870 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1873 if (saved_regs_size > 0)
1875 offset = -crtl->args.pretend_args_size;
1876 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1877 if (need_to_save_reg (regno, 1))
1879 offset -= 0x10;
1880 frame_emit_load (regno, sp_reg, offset);
1885 if (!crtl->is_leaf)
1886 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1888 if (!sibcall_p)
1890 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1891 emit_jump_insn (gen__return ());
1896 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1898 if (count != 0)
1899 return 0;
1900 /* This is inefficient because it ends up copying to a save-register
1901 which then gets saved even though $lr has already been saved. But
1902 it does generate better code for leaf functions and we don't need
1903 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1904 used for __builtin_return_address anyway, so maybe we don't care if
1905 it's inefficient. */
1906 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1910 /* Given VAL, generate a constant appropriate for MODE.
1911 If MODE is a vector mode, every element will be VAL.
1912 For TImode, VAL will be zero extended to 128 bits. */
1914 spu_const (machine_mode mode, HOST_WIDE_INT val)
1916 rtx inner;
1917 rtvec v;
1918 int units, i;
1920 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1921 || GET_MODE_CLASS (mode) == MODE_FLOAT
1922 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1923 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1925 if (GET_MODE_CLASS (mode) == MODE_INT)
1926 return immed_double_const (val, 0, mode);
1928 /* val is the bit representation of the float */
1929 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1930 return hwint_to_const_double (mode, val);
1932 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1933 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1934 else
1935 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1937 units = GET_MODE_NUNITS (mode);
1939 v = rtvec_alloc (units);
1941 for (i = 0; i < units; ++i)
1942 RTVEC_ELT (v, i) = inner;
1944 return gen_rtx_CONST_VECTOR (mode, v);
1947 /* Create a MODE vector constant from 4 ints. */
1949 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1951 unsigned char arr[16];
1952 arr[0] = (a >> 24) & 0xff;
1953 arr[1] = (a >> 16) & 0xff;
1954 arr[2] = (a >> 8) & 0xff;
1955 arr[3] = (a >> 0) & 0xff;
1956 arr[4] = (b >> 24) & 0xff;
1957 arr[5] = (b >> 16) & 0xff;
1958 arr[6] = (b >> 8) & 0xff;
1959 arr[7] = (b >> 0) & 0xff;
1960 arr[8] = (c >> 24) & 0xff;
1961 arr[9] = (c >> 16) & 0xff;
1962 arr[10] = (c >> 8) & 0xff;
1963 arr[11] = (c >> 0) & 0xff;
1964 arr[12] = (d >> 24) & 0xff;
1965 arr[13] = (d >> 16) & 0xff;
1966 arr[14] = (d >> 8) & 0xff;
1967 arr[15] = (d >> 0) & 0xff;
1968 return array_to_constant(mode, arr);
1971 /* branch hint stuff */
1973 /* An array of these is used to propagate hints to predecessor blocks. */
1974 struct spu_bb_info
1976 rtx_insn *prop_jump; /* propagated from another block */
1977 int bb_index; /* the original block. */
1979 static struct spu_bb_info *spu_bb_info;
1981 #define STOP_HINT_P(INSN) \
1982 (CALL_P(INSN) \
1983 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1984 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1986 /* 1 when RTX is a hinted branch or its target. We keep track of
1987 what has been hinted so the safe-hint code can test it easily. */
1988 #define HINTED_P(RTX) \
1989 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1991 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1992 #define SCHED_ON_EVEN_P(RTX) \
1993 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1995 /* Emit a nop for INSN such that the two will dual issue. This assumes
1996 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1997 We check for TImode to handle a MULTI1 insn which has dual issued its
1998 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1999 static void
2000 emit_nop_for_insn (rtx_insn *insn)
2002 int p;
2003 rtx_insn *new_insn;
2005 /* We need to handle JUMP_TABLE_DATA separately. */
2006 if (JUMP_TABLE_DATA_P (insn))
2008 new_insn = emit_insn_after (gen_lnop(), insn);
2009 recog_memoized (new_insn);
2010 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
2011 return;
2014 p = get_pipe (insn);
2015 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2016 new_insn = emit_insn_after (gen_lnop (), insn);
2017 else if (p == 1 && GET_MODE (insn) == TImode)
2019 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2020 PUT_MODE (new_insn, TImode);
2021 PUT_MODE (insn, VOIDmode);
2023 else
2024 new_insn = emit_insn_after (gen_lnop (), insn);
2025 recog_memoized (new_insn);
2026 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2029 /* Insert nops in basic blocks to meet dual issue alignment
2030 requirements. Also make sure hbrp and hint instructions are at least
2031 one cycle apart, possibly inserting a nop. */
2032 static void
2033 pad_bb(void)
2035 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2036 int length;
2037 int addr;
2039 /* This sets up INSN_ADDRESSES. */
2040 shorten_branches (get_insns ());
2042 /* Keep track of length added by nops. */
2043 length = 0;
2045 prev_insn = 0;
2046 insn = get_insns ();
2047 if (!active_insn_p (insn))
2048 insn = next_active_insn (insn);
2049 for (; insn; insn = next_insn)
2051 next_insn = next_active_insn (insn);
2052 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2053 || INSN_CODE (insn) == CODE_FOR_hbr)
2055 if (hbr_insn)
2057 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2058 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2059 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2060 || (a1 - a0 == 4))
2062 prev_insn = emit_insn_before (gen_lnop (), insn);
2063 PUT_MODE (prev_insn, GET_MODE (insn));
2064 PUT_MODE (insn, TImode);
2065 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2066 length += 4;
2069 hbr_insn = insn;
2071 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2073 if (GET_MODE (insn) == TImode)
2074 PUT_MODE (next_insn, TImode);
2075 insn = next_insn;
2076 next_insn = next_active_insn (insn);
2078 addr = INSN_ADDRESSES (INSN_UID (insn));
2079 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2081 if (((addr + length) & 7) != 0)
2083 emit_nop_for_insn (prev_insn);
2084 length += 4;
2087 else if (GET_MODE (insn) == TImode
2088 && ((next_insn && GET_MODE (next_insn) != TImode)
2089 || get_attr_type (insn) == TYPE_MULTI0)
2090 && ((addr + length) & 7) != 0)
2092 /* prev_insn will always be set because the first insn is
2093 always 8-byte aligned. */
2094 emit_nop_for_insn (prev_insn);
2095 length += 4;
2097 prev_insn = insn;
2102 /* Routines for branch hints. */
2104 static void
2105 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2106 int distance, sbitmap blocks)
2108 rtx branch_label = 0;
2109 rtx_insn *hint;
2110 rtx_insn *insn;
2111 rtx_jump_table_data *table;
2113 if (before == 0 || branch == 0 || target == 0)
2114 return;
2116 /* While scheduling we require hints to be no further than 600, so
2117 we need to enforce that here too */
2118 if (distance > 600)
2119 return;
2121 /* If we have a Basic block note, emit it after the basic block note. */
2122 if (NOTE_INSN_BASIC_BLOCK_P (before))
2123 before = NEXT_INSN (before);
2125 branch_label = gen_label_rtx ();
2126 LABEL_NUSES (branch_label)++;
2127 LABEL_PRESERVE_P (branch_label) = 1;
2128 insn = emit_label_before (branch_label, branch);
2129 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2130 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2132 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2133 recog_memoized (hint);
2134 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2135 HINTED_P (branch) = 1;
2137 if (GET_CODE (target) == LABEL_REF)
2138 HINTED_P (XEXP (target, 0)) = 1;
2139 else if (tablejump_p (branch, 0, &table))
2141 rtvec vec;
2142 int j;
2143 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2144 vec = XVEC (PATTERN (table), 0);
2145 else
2146 vec = XVEC (PATTERN (table), 1);
2147 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2148 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2151 if (distance >= 588)
2153 /* Make sure the hint isn't scheduled any earlier than this point,
2154 which could make it too far for the branch offest to fit */
2155 insn = emit_insn_before (gen_blockage (), hint);
2156 recog_memoized (insn);
2157 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2159 else if (distance <= 8 * 4)
2161 /* To guarantee at least 8 insns between the hint and branch we
2162 insert nops. */
2163 int d;
2164 for (d = distance; d < 8 * 4; d += 4)
2166 insn =
2167 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2168 recog_memoized (insn);
2169 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2172 /* Make sure any nops inserted aren't scheduled before the hint. */
2173 insn = emit_insn_after (gen_blockage (), hint);
2174 recog_memoized (insn);
2175 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2177 /* Make sure any nops inserted aren't scheduled after the call. */
2178 if (CALL_P (branch) && distance < 8 * 4)
2180 insn = emit_insn_before (gen_blockage (), branch);
2181 recog_memoized (insn);
2182 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2187 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2188 the rtx for the branch target. */
2189 static rtx
2190 get_branch_target (rtx_insn *branch)
2192 if (JUMP_P (branch))
2194 rtx set, src;
2196 /* Return statements */
2197 if (GET_CODE (PATTERN (branch)) == RETURN)
2198 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2200 /* ASM GOTOs. */
2201 if (extract_asm_operands (PATTERN (branch)) != NULL)
2202 return NULL;
2204 set = single_set (branch);
2205 src = SET_SRC (set);
2206 if (GET_CODE (SET_DEST (set)) != PC)
2207 abort ();
2209 if (GET_CODE (src) == IF_THEN_ELSE)
2211 rtx lab = 0;
2212 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2213 if (note)
2215 /* If the more probable case is not a fall through, then
2216 try a branch hint. */
2217 int prob = XINT (note, 0);
2218 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2219 && GET_CODE (XEXP (src, 1)) != PC)
2220 lab = XEXP (src, 1);
2221 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2222 && GET_CODE (XEXP (src, 2)) != PC)
2223 lab = XEXP (src, 2);
2225 if (lab)
2227 if (GET_CODE (lab) == RETURN)
2228 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2229 return lab;
2231 return 0;
2234 return src;
2236 else if (CALL_P (branch))
2238 rtx call;
2239 /* All of our call patterns are in a PARALLEL and the CALL is
2240 the first pattern in the PARALLEL. */
2241 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2242 abort ();
2243 call = XVECEXP (PATTERN (branch), 0, 0);
2244 if (GET_CODE (call) == SET)
2245 call = SET_SRC (call);
2246 if (GET_CODE (call) != CALL)
2247 abort ();
2248 return XEXP (XEXP (call, 0), 0);
2250 return 0;
2253 /* The special $hbr register is used to prevent the insn scheduler from
2254 moving hbr insns across instructions which invalidate them. It
2255 should only be used in a clobber, and this function searches for
2256 insns which clobber it. */
2257 static bool
2258 insn_clobbers_hbr (rtx_insn *insn)
2260 if (INSN_P (insn)
2261 && GET_CODE (PATTERN (insn)) == PARALLEL)
2263 rtx parallel = PATTERN (insn);
2264 rtx clobber;
2265 int j;
2266 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2268 clobber = XVECEXP (parallel, 0, j);
2269 if (GET_CODE (clobber) == CLOBBER
2270 && GET_CODE (XEXP (clobber, 0)) == REG
2271 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2272 return 1;
2275 return 0;
2278 /* Search up to 32 insns starting at FIRST:
2279 - at any kind of hinted branch, just return
2280 - at any unconditional branch in the first 15 insns, just return
2281 - at a call or indirect branch, after the first 15 insns, force it to
2282 an even address and return
2283 - at any unconditional branch, after the first 15 insns, force it to
2284 an even address.
2285 At then end of the search, insert an hbrp within 4 insns of FIRST,
2286 and an hbrp within 16 instructions of FIRST.
2288 static void
2289 insert_hbrp_for_ilb_runout (rtx_insn *first)
2291 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2292 int addr = 0, length, first_addr = -1;
2293 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2294 int insert_lnop_after = 0;
2295 for (insn = first; insn; insn = NEXT_INSN (insn))
2296 if (INSN_P (insn))
2298 if (first_addr == -1)
2299 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2300 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2301 length = get_attr_length (insn);
2303 if (before_4 == 0 && addr + length >= 4 * 4)
2304 before_4 = insn;
2305 /* We test for 14 instructions because the first hbrp will add
2306 up to 2 instructions. */
2307 if (before_16 == 0 && addr + length >= 14 * 4)
2308 before_16 = insn;
2310 if (INSN_CODE (insn) == CODE_FOR_hbr)
2312 /* Make sure an hbrp is at least 2 cycles away from a hint.
2313 Insert an lnop after the hbrp when necessary. */
2314 if (before_4 == 0 && addr > 0)
2316 before_4 = insn;
2317 insert_lnop_after |= 1;
2319 else if (before_4 && addr <= 4 * 4)
2320 insert_lnop_after |= 1;
2321 if (before_16 == 0 && addr > 10 * 4)
2323 before_16 = insn;
2324 insert_lnop_after |= 2;
2326 else if (before_16 && addr <= 14 * 4)
2327 insert_lnop_after |= 2;
2330 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2332 if (addr < hbrp_addr0)
2333 hbrp_addr0 = addr;
2334 else if (addr < hbrp_addr1)
2335 hbrp_addr1 = addr;
2338 if (CALL_P (insn) || JUMP_P (insn))
2340 if (HINTED_P (insn))
2341 return;
2343 /* Any branch after the first 15 insns should be on an even
2344 address to avoid a special case branch. There might be
2345 some nops and/or hbrps inserted, so we test after 10
2346 insns. */
2347 if (addr > 10 * 4)
2348 SCHED_ON_EVEN_P (insn) = 1;
2351 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2352 return;
2355 if (addr + length >= 32 * 4)
2357 gcc_assert (before_4 && before_16);
2358 if (hbrp_addr0 > 4 * 4)
2360 insn =
2361 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2362 recog_memoized (insn);
2363 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2364 INSN_ADDRESSES_NEW (insn,
2365 INSN_ADDRESSES (INSN_UID (before_4)));
2366 PUT_MODE (insn, GET_MODE (before_4));
2367 PUT_MODE (before_4, TImode);
2368 if (insert_lnop_after & 1)
2370 insn = emit_insn_before (gen_lnop (), before_4);
2371 recog_memoized (insn);
2372 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2373 INSN_ADDRESSES_NEW (insn,
2374 INSN_ADDRESSES (INSN_UID (before_4)));
2375 PUT_MODE (insn, TImode);
2378 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2379 && hbrp_addr1 > 16 * 4)
2381 insn =
2382 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2383 recog_memoized (insn);
2384 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2385 INSN_ADDRESSES_NEW (insn,
2386 INSN_ADDRESSES (INSN_UID (before_16)));
2387 PUT_MODE (insn, GET_MODE (before_16));
2388 PUT_MODE (before_16, TImode);
2389 if (insert_lnop_after & 2)
2391 insn = emit_insn_before (gen_lnop (), before_16);
2392 recog_memoized (insn);
2393 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2394 INSN_ADDRESSES_NEW (insn,
2395 INSN_ADDRESSES (INSN_UID
2396 (before_16)));
2397 PUT_MODE (insn, TImode);
2400 return;
2403 else if (BARRIER_P (insn))
2404 return;
2408 /* The SPU might hang when it executes 48 inline instructions after a
2409 hinted branch jumps to its hinted target. The beginning of a
2410 function and the return from a call might have been hinted, and
2411 must be handled as well. To prevent a hang we insert 2 hbrps. The
2412 first should be within 6 insns of the branch target. The second
2413 should be within 22 insns of the branch target. When determining
2414 if hbrps are necessary, we look for only 32 inline instructions,
2415 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2416 when inserting new hbrps, we insert them within 4 and 16 insns of
2417 the target. */
2418 static void
2419 insert_hbrp (void)
2421 rtx_insn *insn;
2422 if (TARGET_SAFE_HINTS)
2424 shorten_branches (get_insns ());
2425 /* Insert hbrp at beginning of function */
2426 insn = next_active_insn (get_insns ());
2427 if (insn)
2428 insert_hbrp_for_ilb_runout (insn);
2429 /* Insert hbrp after hinted targets. */
2430 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2431 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2432 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2436 static int in_spu_reorg;
2438 static void
2439 spu_var_tracking (void)
2441 if (flag_var_tracking)
2443 df_analyze ();
2444 timevar_push (TV_VAR_TRACKING);
2445 variable_tracking_main ();
2446 timevar_pop (TV_VAR_TRACKING);
2447 df_finish_pass (false);
2451 /* Insert branch hints. There are no branch optimizations after this
2452 pass, so it's safe to set our branch hints now. */
2453 static void
2454 spu_machine_dependent_reorg (void)
2456 sbitmap blocks;
2457 basic_block bb;
2458 rtx_insn *branch, *insn;
2459 rtx branch_target = 0;
2460 int branch_addr = 0, insn_addr, required_dist = 0;
2461 int i;
2462 unsigned int j;
2464 if (!TARGET_BRANCH_HINTS || optimize == 0)
2466 /* We still do it for unoptimized code because an external
2467 function might have hinted a call or return. */
2468 compute_bb_for_insn ();
2469 insert_hbrp ();
2470 pad_bb ();
2471 spu_var_tracking ();
2472 free_bb_for_insn ();
2473 return;
2476 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2477 bitmap_clear (blocks);
2479 in_spu_reorg = 1;
2480 compute_bb_for_insn ();
2482 /* (Re-)discover loops so that bb->loop_father can be used
2483 in the analysis below. */
2484 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2486 compact_blocks ();
2488 spu_bb_info =
2489 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2490 sizeof (struct spu_bb_info));
2492 /* We need exact insn addresses and lengths. */
2493 shorten_branches (get_insns ());
2495 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2497 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2498 branch = 0;
2499 if (spu_bb_info[i].prop_jump)
2501 branch = spu_bb_info[i].prop_jump;
2502 branch_target = get_branch_target (branch);
2503 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2504 required_dist = spu_hint_dist;
2506 /* Search from end of a block to beginning. In this loop, find
2507 jumps which need a branch and emit them only when:
2508 - it's an indirect branch and we're at the insn which sets
2509 the register
2510 - we're at an insn that will invalidate the hint. e.g., a
2511 call, another hint insn, inline asm that clobbers $hbr, and
2512 some inlined operations (divmodsi4). Don't consider jumps
2513 because they are only at the end of a block and are
2514 considered when we are deciding whether to propagate
2515 - we're getting too far away from the branch. The hbr insns
2516 only have a signed 10 bit offset
2517 We go back as far as possible so the branch will be considered
2518 for propagation when we get to the beginning of the block. */
2519 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2521 if (INSN_P (insn))
2523 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2524 if (branch
2525 && ((GET_CODE (branch_target) == REG
2526 && set_of (branch_target, insn) != NULL_RTX)
2527 || insn_clobbers_hbr (insn)
2528 || branch_addr - insn_addr > 600))
2530 rtx_insn *next = NEXT_INSN (insn);
2531 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2532 if (insn != BB_END (bb)
2533 && branch_addr - next_addr >= required_dist)
2535 if (dump_file)
2536 fprintf (dump_file,
2537 "hint for %i in block %i before %i\n",
2538 INSN_UID (branch), bb->index,
2539 INSN_UID (next));
2540 spu_emit_branch_hint (next, branch, branch_target,
2541 branch_addr - next_addr, blocks);
2543 branch = 0;
2546 /* JUMP_P will only be true at the end of a block. When
2547 branch is already set it means we've previously decided
2548 to propagate a hint for that branch into this block. */
2549 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2551 branch = 0;
2552 if ((branch_target = get_branch_target (insn)))
2554 branch = insn;
2555 branch_addr = insn_addr;
2556 required_dist = spu_hint_dist;
2560 if (insn == BB_HEAD (bb))
2561 break;
2564 if (branch)
2566 /* If we haven't emitted a hint for this branch yet, it might
2567 be profitable to emit it in one of the predecessor blocks,
2568 especially for loops. */
2569 rtx_insn *bbend;
2570 basic_block prev = 0, prop = 0, prev2 = 0;
2571 int loop_exit = 0, simple_loop = 0;
2572 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2574 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2575 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2576 prev = EDGE_PRED (bb, j)->src;
2577 else
2578 prev2 = EDGE_PRED (bb, j)->src;
2580 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2581 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2582 loop_exit = 1;
2583 else if (EDGE_SUCC (bb, j)->dest == bb)
2584 simple_loop = 1;
2586 /* If this branch is a loop exit then propagate to previous
2587 fallthru block. This catches the cases when it is a simple
2588 loop or when there is an initial branch into the loop. */
2589 if (prev && (loop_exit || simple_loop)
2590 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2591 prop = prev;
2593 /* If there is only one adjacent predecessor. Don't propagate
2594 outside this loop. */
2595 else if (prev && single_pred_p (bb)
2596 && prev->loop_father == bb->loop_father)
2597 prop = prev;
2599 /* If this is the JOIN block of a simple IF-THEN then
2600 propagate the hint to the HEADER block. */
2601 else if (prev && prev2
2602 && EDGE_COUNT (bb->preds) == 2
2603 && EDGE_COUNT (prev->preds) == 1
2604 && EDGE_PRED (prev, 0)->src == prev2
2605 && prev2->loop_father == bb->loop_father
2606 && GET_CODE (branch_target) != REG)
2607 prop = prev;
2609 /* Don't propagate when:
2610 - this is a simple loop and the hint would be too far
2611 - this is not a simple loop and there are 16 insns in
2612 this block already
2613 - the predecessor block ends in a branch that will be
2614 hinted
2615 - the predecessor block ends in an insn that invalidates
2616 the hint */
2617 if (prop
2618 && prop->index >= 0
2619 && (bbend = BB_END (prop))
2620 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2621 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2622 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2624 if (dump_file)
2625 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2626 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2627 bb->index, prop->index, bb_loop_depth (bb),
2628 INSN_UID (branch), loop_exit, simple_loop,
2629 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2631 spu_bb_info[prop->index].prop_jump = branch;
2632 spu_bb_info[prop->index].bb_index = i;
2634 else if (branch_addr - next_addr >= required_dist)
2636 if (dump_file)
2637 fprintf (dump_file, "hint for %i in block %i before %i\n",
2638 INSN_UID (branch), bb->index,
2639 INSN_UID (NEXT_INSN (insn)));
2640 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2641 branch_addr - next_addr, blocks);
2643 branch = 0;
2646 free (spu_bb_info);
2648 if (!bitmap_empty_p (blocks))
2649 find_many_sub_basic_blocks (blocks);
2651 /* We have to schedule to make sure alignment is ok. */
2652 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2654 /* The hints need to be scheduled, so call it again. */
2655 schedule_insns ();
2656 df_finish_pass (true);
2658 insert_hbrp ();
2660 pad_bb ();
2662 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2663 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2665 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2666 between its branch label and the branch . We don't move the
2667 label because GCC expects it at the beginning of the block. */
2668 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2669 rtx label_ref = XVECEXP (unspec, 0, 0);
2670 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2671 rtx_insn *branch;
2672 int offset = 0;
2673 for (branch = NEXT_INSN (label);
2674 !JUMP_P (branch) && !CALL_P (branch);
2675 branch = NEXT_INSN (branch))
2676 if (NONJUMP_INSN_P (branch))
2677 offset += get_attr_length (branch);
2678 if (offset > 0)
2679 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2682 spu_var_tracking ();
2684 loop_optimizer_finalize ();
2686 free_bb_for_insn ();
2688 in_spu_reorg = 0;
2692 /* Insn scheduling routines, primarily for dual issue. */
2693 static int
2694 spu_sched_issue_rate (void)
2696 return 2;
2699 static int
2700 uses_ls_unit(rtx_insn *insn)
2702 rtx set = single_set (insn);
2703 if (set != 0
2704 && (GET_CODE (SET_DEST (set)) == MEM
2705 || GET_CODE (SET_SRC (set)) == MEM))
2706 return 1;
2707 return 0;
2710 static int
2711 get_pipe (rtx_insn *insn)
2713 enum attr_type t;
2714 /* Handle inline asm */
2715 if (INSN_CODE (insn) == -1)
2716 return -1;
2717 t = get_attr_type (insn);
2718 switch (t)
2720 case TYPE_CONVERT:
2721 return -2;
2722 case TYPE_MULTI0:
2723 return -1;
2725 case TYPE_FX2:
2726 case TYPE_FX3:
2727 case TYPE_SPR:
2728 case TYPE_NOP:
2729 case TYPE_FXB:
2730 case TYPE_FPD:
2731 case TYPE_FP6:
2732 case TYPE_FP7:
2733 return 0;
2735 case TYPE_LNOP:
2736 case TYPE_SHUF:
2737 case TYPE_LOAD:
2738 case TYPE_STORE:
2739 case TYPE_BR:
2740 case TYPE_MULTI1:
2741 case TYPE_HBR:
2742 case TYPE_IPREFETCH:
2743 return 1;
2744 default:
2745 abort ();
2750 /* haifa-sched.c has a static variable that keeps track of the current
2751 cycle. It is passed to spu_sched_reorder, and we record it here for
2752 use by spu_sched_variable_issue. It won't be accurate if the
2753 scheduler updates it's clock_var between the two calls. */
2754 static int clock_var;
2756 /* This is used to keep track of insn alignment. Set to 0 at the
2757 beginning of each block and increased by the "length" attr of each
2758 insn scheduled. */
2759 static int spu_sched_length;
2761 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2762 ready list appropriately in spu_sched_reorder(). */
2763 static int pipe0_clock;
2764 static int pipe1_clock;
2766 static int prev_clock_var;
2768 static int prev_priority;
2770 /* The SPU needs to load the next ilb sometime during the execution of
2771 the previous ilb. There is a potential conflict if every cycle has a
2772 load or store. To avoid the conflict we make sure the load/store
2773 unit is free for at least one cycle during the execution of insns in
2774 the previous ilb. */
2775 static int spu_ls_first;
2776 static int prev_ls_clock;
2778 static void
2779 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2780 int max_ready ATTRIBUTE_UNUSED)
2782 spu_sched_length = 0;
2785 static void
2786 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2787 int max_ready ATTRIBUTE_UNUSED)
2789 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2791 /* When any block might be at least 8-byte aligned, assume they
2792 will all be at least 8-byte aligned to make sure dual issue
2793 works out correctly. */
2794 spu_sched_length = 0;
2796 spu_ls_first = INT_MAX;
2797 clock_var = -1;
2798 prev_ls_clock = -1;
2799 pipe0_clock = -1;
2800 pipe1_clock = -1;
2801 prev_clock_var = -1;
2802 prev_priority = -1;
2805 static int
2806 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2807 int verbose ATTRIBUTE_UNUSED,
2808 rtx_insn *insn, int more)
2810 int len;
2811 int p;
2812 if (GET_CODE (PATTERN (insn)) == USE
2813 || GET_CODE (PATTERN (insn)) == CLOBBER
2814 || (len = get_attr_length (insn)) == 0)
2815 return more;
2817 spu_sched_length += len;
2819 /* Reset on inline asm */
2820 if (INSN_CODE (insn) == -1)
2822 spu_ls_first = INT_MAX;
2823 pipe0_clock = -1;
2824 pipe1_clock = -1;
2825 return 0;
2827 p = get_pipe (insn);
2828 if (p == 0)
2829 pipe0_clock = clock_var;
2830 else
2831 pipe1_clock = clock_var;
2833 if (in_spu_reorg)
2835 if (clock_var - prev_ls_clock > 1
2836 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2837 spu_ls_first = INT_MAX;
2838 if (uses_ls_unit (insn))
2840 if (spu_ls_first == INT_MAX)
2841 spu_ls_first = spu_sched_length;
2842 prev_ls_clock = clock_var;
2845 /* The scheduler hasn't inserted the nop, but we will later on.
2846 Include those nops in spu_sched_length. */
2847 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2848 spu_sched_length += 4;
2849 prev_clock_var = clock_var;
2851 /* more is -1 when called from spu_sched_reorder for new insns
2852 that don't have INSN_PRIORITY */
2853 if (more >= 0)
2854 prev_priority = INSN_PRIORITY (insn);
2857 /* Always try issuing more insns. spu_sched_reorder will decide
2858 when the cycle should be advanced. */
2859 return 1;
2862 /* This function is called for both TARGET_SCHED_REORDER and
2863 TARGET_SCHED_REORDER2. */
2864 static int
2865 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2866 rtx_insn **ready, int *nreadyp, int clock)
2868 int i, nready = *nreadyp;
2869 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2870 rtx_insn *insn;
2872 clock_var = clock;
2874 if (nready <= 0 || pipe1_clock >= clock)
2875 return 0;
2877 /* Find any rtl insns that don't generate assembly insns and schedule
2878 them first. */
2879 for (i = nready - 1; i >= 0; i--)
2881 insn = ready[i];
2882 if (INSN_CODE (insn) == -1
2883 || INSN_CODE (insn) == CODE_FOR_blockage
2884 || (INSN_P (insn) && get_attr_length (insn) == 0))
2886 ready[i] = ready[nready - 1];
2887 ready[nready - 1] = insn;
2888 return 1;
2892 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2893 for (i = 0; i < nready; i++)
2894 if (INSN_CODE (ready[i]) != -1)
2896 insn = ready[i];
2897 switch (get_attr_type (insn))
2899 default:
2900 case TYPE_MULTI0:
2901 case TYPE_CONVERT:
2902 case TYPE_FX2:
2903 case TYPE_FX3:
2904 case TYPE_SPR:
2905 case TYPE_NOP:
2906 case TYPE_FXB:
2907 case TYPE_FPD:
2908 case TYPE_FP6:
2909 case TYPE_FP7:
2910 pipe_0 = i;
2911 break;
2912 case TYPE_LOAD:
2913 case TYPE_STORE:
2914 pipe_ls = i;
2915 case TYPE_LNOP:
2916 case TYPE_SHUF:
2917 case TYPE_BR:
2918 case TYPE_MULTI1:
2919 case TYPE_HBR:
2920 pipe_1 = i;
2921 break;
2922 case TYPE_IPREFETCH:
2923 pipe_hbrp = i;
2924 break;
2928 /* In the first scheduling phase, schedule loads and stores together
2929 to increase the chance they will get merged during postreload CSE. */
2930 if (!reload_completed && pipe_ls >= 0)
2932 insn = ready[pipe_ls];
2933 ready[pipe_ls] = ready[nready - 1];
2934 ready[nready - 1] = insn;
2935 return 1;
2938 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2939 if (pipe_hbrp >= 0)
2940 pipe_1 = pipe_hbrp;
2942 /* When we have loads/stores in every cycle of the last 15 insns and
2943 we are about to schedule another load/store, emit an hbrp insn
2944 instead. */
2945 if (in_spu_reorg
2946 && spu_sched_length - spu_ls_first >= 4 * 15
2947 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2949 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2950 recog_memoized (insn);
2951 if (pipe0_clock < clock)
2952 PUT_MODE (insn, TImode);
2953 spu_sched_variable_issue (file, verbose, insn, -1);
2954 return 0;
2957 /* In general, we want to emit nops to increase dual issue, but dual
2958 issue isn't faster when one of the insns could be scheduled later
2959 without effecting the critical path. We look at INSN_PRIORITY to
2960 make a good guess, but it isn't perfect so -mdual-nops=n can be
2961 used to effect it. */
2962 if (in_spu_reorg && spu_dual_nops < 10)
2964 /* When we are at an even address and we are not issuing nops to
2965 improve scheduling then we need to advance the cycle. */
2966 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2967 && (spu_dual_nops == 0
2968 || (pipe_1 != -1
2969 && prev_priority >
2970 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2971 return 0;
2973 /* When at an odd address, schedule the highest priority insn
2974 without considering pipeline. */
2975 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2976 && (spu_dual_nops == 0
2977 || (prev_priority >
2978 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2979 return 1;
2983 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2984 pipe0 insn in the ready list, schedule it. */
2985 if (pipe0_clock < clock && pipe_0 >= 0)
2986 schedule_i = pipe_0;
2988 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2989 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2990 else
2991 schedule_i = pipe_1;
2993 if (schedule_i > -1)
2995 insn = ready[schedule_i];
2996 ready[schedule_i] = ready[nready - 1];
2997 ready[nready - 1] = insn;
2998 return 1;
3000 return 0;
3003 /* INSN is dependent on DEP_INSN. */
3004 static int
3005 spu_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep_insn, int cost)
3007 rtx set;
3009 /* The blockage pattern is used to prevent instructions from being
3010 moved across it and has no cost. */
3011 if (INSN_CODE (insn) == CODE_FOR_blockage
3012 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3013 return 0;
3015 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3016 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3017 return 0;
3019 /* Make sure hbrps are spread out. */
3020 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3021 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3022 return 8;
3024 /* Make sure hints and hbrps are 2 cycles apart. */
3025 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3026 || INSN_CODE (insn) == CODE_FOR_hbr)
3027 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3028 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3029 return 2;
3031 /* An hbrp has no real dependency on other insns. */
3032 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3033 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3034 return 0;
3036 /* Assuming that it is unlikely an argument register will be used in
3037 the first cycle of the called function, we reduce the cost for
3038 slightly better scheduling of dep_insn. When not hinted, the
3039 mispredicted branch would hide the cost as well. */
3040 if (CALL_P (insn))
3042 rtx target = get_branch_target (insn);
3043 if (GET_CODE (target) != REG || !set_of (target, insn))
3044 return cost - 2;
3045 return cost;
3048 /* And when returning from a function, let's assume the return values
3049 are completed sooner too. */
3050 if (CALL_P (dep_insn))
3051 return cost - 2;
3053 /* Make sure an instruction that loads from the back chain is schedule
3054 away from the return instruction so a hint is more likely to get
3055 issued. */
3056 if (INSN_CODE (insn) == CODE_FOR__return
3057 && (set = single_set (dep_insn))
3058 && GET_CODE (SET_DEST (set)) == REG
3059 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3060 return 20;
3062 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3063 scheduler makes every insn in a block anti-dependent on the final
3064 jump_insn. We adjust here so higher cost insns will get scheduled
3065 earlier. */
3066 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3067 return insn_cost (dep_insn) - 3;
3069 return cost;
3072 /* Create a CONST_DOUBLE from a string. */
3074 spu_float_const (const char *string, machine_mode mode)
3076 REAL_VALUE_TYPE value;
3077 value = REAL_VALUE_ATOF (string, mode);
3078 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3082 spu_constant_address_p (rtx x)
3084 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3085 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3086 || GET_CODE (x) == HIGH);
3089 static enum spu_immediate
3090 which_immediate_load (HOST_WIDE_INT val)
3092 gcc_assert (val == trunc_int_for_mode (val, SImode));
3094 if (val >= -0x8000 && val <= 0x7fff)
3095 return SPU_IL;
3096 if (val >= 0 && val <= 0x3ffff)
3097 return SPU_ILA;
3098 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3099 return SPU_ILH;
3100 if ((val & 0xffff) == 0)
3101 return SPU_ILHU;
3103 return SPU_NONE;
3106 /* Return true when OP can be loaded by one of the il instructions, or
3107 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3109 immediate_load_p (rtx op, machine_mode mode)
3111 if (CONSTANT_P (op))
3113 enum immediate_class c = classify_immediate (op, mode);
3114 return c == IC_IL1 || c == IC_IL1s
3115 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3117 return 0;
3120 /* Return true if the first SIZE bytes of arr is a constant that can be
3121 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3122 represent the size and offset of the instruction to use. */
3123 static int
3124 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3126 int cpat, run, i, start;
3127 cpat = 1;
3128 run = 0;
3129 start = -1;
3130 for (i = 0; i < size && cpat; i++)
3131 if (arr[i] != i+16)
3133 if (!run)
3135 start = i;
3136 if (arr[i] == 3)
3137 run = 1;
3138 else if (arr[i] == 2 && arr[i+1] == 3)
3139 run = 2;
3140 else if (arr[i] == 0)
3142 while (arr[i+run] == run && i+run < 16)
3143 run++;
3144 if (run != 4 && run != 8)
3145 cpat = 0;
3147 else
3148 cpat = 0;
3149 if ((i & (run-1)) != 0)
3150 cpat = 0;
3151 i += run;
3153 else
3154 cpat = 0;
3156 if (cpat && (run || size < 16))
3158 if (run == 0)
3159 run = 1;
3160 if (prun)
3161 *prun = run;
3162 if (pstart)
3163 *pstart = start == -1 ? 16-run : start;
3164 return 1;
3166 return 0;
3169 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3170 it into a register. MODE is only valid when OP is a CONST_INT. */
3171 static enum immediate_class
3172 classify_immediate (rtx op, machine_mode mode)
3174 HOST_WIDE_INT val;
3175 unsigned char arr[16];
3176 int i, j, repeated, fsmbi, repeat;
3178 gcc_assert (CONSTANT_P (op));
3180 if (GET_MODE (op) != VOIDmode)
3181 mode = GET_MODE (op);
3183 /* A V4SI const_vector with all identical symbols is ok. */
3184 if (!flag_pic
3185 && mode == V4SImode
3186 && GET_CODE (op) == CONST_VECTOR
3187 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3188 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3189 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3190 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3191 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3192 op = CONST_VECTOR_ELT (op, 0);
3194 switch (GET_CODE (op))
3196 case SYMBOL_REF:
3197 case LABEL_REF:
3198 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3200 case CONST:
3201 /* We can never know if the resulting address fits in 18 bits and can be
3202 loaded with ila. For now, assume the address will not overflow if
3203 the displacement is "small" (fits 'K' constraint). */
3204 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3206 rtx sym = XEXP (XEXP (op, 0), 0);
3207 rtx cst = XEXP (XEXP (op, 0), 1);
3209 if (GET_CODE (sym) == SYMBOL_REF
3210 && GET_CODE (cst) == CONST_INT
3211 && satisfies_constraint_K (cst))
3212 return IC_IL1s;
3214 return IC_IL2s;
3216 case HIGH:
3217 return IC_IL1s;
3219 case CONST_VECTOR:
3220 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3221 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3222 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3223 return IC_POOL;
3224 /* Fall through. */
3226 case CONST_INT:
3227 case CONST_DOUBLE:
3228 constant_to_array (mode, op, arr);
3230 /* Check that each 4-byte slot is identical. */
3231 repeated = 1;
3232 for (i = 4; i < 16; i += 4)
3233 for (j = 0; j < 4; j++)
3234 if (arr[j] != arr[i + j])
3235 repeated = 0;
3237 if (repeated)
3239 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3240 val = trunc_int_for_mode (val, SImode);
3242 if (which_immediate_load (val) != SPU_NONE)
3243 return IC_IL1;
3246 /* Any mode of 2 bytes or smaller can be loaded with an il
3247 instruction. */
3248 gcc_assert (GET_MODE_SIZE (mode) > 2);
3250 fsmbi = 1;
3251 repeat = 0;
3252 for (i = 0; i < 16 && fsmbi; i++)
3253 if (arr[i] != 0 && repeat == 0)
3254 repeat = arr[i];
3255 else if (arr[i] != 0 && arr[i] != repeat)
3256 fsmbi = 0;
3257 if (fsmbi)
3258 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3260 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3261 return IC_CPAT;
3263 if (repeated)
3264 return IC_IL2;
3266 return IC_POOL;
3267 default:
3268 break;
3270 gcc_unreachable ();
3273 static enum spu_immediate
3274 which_logical_immediate (HOST_WIDE_INT val)
3276 gcc_assert (val == trunc_int_for_mode (val, SImode));
3278 if (val >= -0x200 && val <= 0x1ff)
3279 return SPU_ORI;
3280 if (val >= 0 && val <= 0xffff)
3281 return SPU_IOHL;
3282 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3284 val = trunc_int_for_mode (val, HImode);
3285 if (val >= -0x200 && val <= 0x1ff)
3286 return SPU_ORHI;
3287 if ((val & 0xff) == ((val >> 8) & 0xff))
3289 val = trunc_int_for_mode (val, QImode);
3290 if (val >= -0x200 && val <= 0x1ff)
3291 return SPU_ORBI;
3294 return SPU_NONE;
3297 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3298 CONST_DOUBLEs. */
3299 static int
3300 const_vector_immediate_p (rtx x)
3302 int i;
3303 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3304 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3305 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3306 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3307 return 0;
3308 return 1;
3312 logical_immediate_p (rtx op, machine_mode mode)
3314 HOST_WIDE_INT val;
3315 unsigned char arr[16];
3316 int i, j;
3318 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3319 || GET_CODE (op) == CONST_VECTOR);
3321 if (GET_CODE (op) == CONST_VECTOR
3322 && !const_vector_immediate_p (op))
3323 return 0;
3325 if (GET_MODE (op) != VOIDmode)
3326 mode = GET_MODE (op);
3328 constant_to_array (mode, op, arr);
3330 /* Check that bytes are repeated. */
3331 for (i = 4; i < 16; i += 4)
3332 for (j = 0; j < 4; j++)
3333 if (arr[j] != arr[i + j])
3334 return 0;
3336 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3337 val = trunc_int_for_mode (val, SImode);
3339 i = which_logical_immediate (val);
3340 return i != SPU_NONE && i != SPU_IOHL;
3344 iohl_immediate_p (rtx op, machine_mode mode)
3346 HOST_WIDE_INT val;
3347 unsigned char arr[16];
3348 int i, j;
3350 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3351 || GET_CODE (op) == CONST_VECTOR);
3353 if (GET_CODE (op) == CONST_VECTOR
3354 && !const_vector_immediate_p (op))
3355 return 0;
3357 if (GET_MODE (op) != VOIDmode)
3358 mode = GET_MODE (op);
3360 constant_to_array (mode, op, arr);
3362 /* Check that bytes are repeated. */
3363 for (i = 4; i < 16; i += 4)
3364 for (j = 0; j < 4; j++)
3365 if (arr[j] != arr[i + j])
3366 return 0;
3368 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3369 val = trunc_int_for_mode (val, SImode);
3371 return val >= 0 && val <= 0xffff;
3375 arith_immediate_p (rtx op, machine_mode mode,
3376 HOST_WIDE_INT low, HOST_WIDE_INT high)
3378 HOST_WIDE_INT val;
3379 unsigned char arr[16];
3380 int bytes, i, j;
3382 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3383 || GET_CODE (op) == CONST_VECTOR);
3385 if (GET_CODE (op) == CONST_VECTOR
3386 && !const_vector_immediate_p (op))
3387 return 0;
3389 if (GET_MODE (op) != VOIDmode)
3390 mode = GET_MODE (op);
3392 constant_to_array (mode, op, arr);
3394 mode = GET_MODE_INNER (mode);
3395 bytes = GET_MODE_SIZE (mode);
3396 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3398 /* Check that bytes are repeated. */
3399 for (i = bytes; i < 16; i += bytes)
3400 for (j = 0; j < bytes; j++)
3401 if (arr[j] != arr[i + j])
3402 return 0;
3404 val = arr[0];
3405 for (j = 1; j < bytes; j++)
3406 val = (val << 8) | arr[j];
3408 val = trunc_int_for_mode (val, mode);
3410 return val >= low && val <= high;
3413 /* TRUE when op is an immediate and an exact power of 2, and given that
3414 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3415 all entries must be the same. */
3416 bool
3417 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3419 machine_mode int_mode;
3420 HOST_WIDE_INT val;
3421 unsigned char arr[16];
3422 int bytes, i, j;
3424 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3425 || GET_CODE (op) == CONST_VECTOR);
3427 if (GET_CODE (op) == CONST_VECTOR
3428 && !const_vector_immediate_p (op))
3429 return 0;
3431 if (GET_MODE (op) != VOIDmode)
3432 mode = GET_MODE (op);
3434 constant_to_array (mode, op, arr);
3436 if (VECTOR_MODE_P (mode))
3437 mode = GET_MODE_INNER (mode);
3439 bytes = GET_MODE_SIZE (mode);
3440 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3442 /* Check that bytes are repeated. */
3443 for (i = bytes; i < 16; i += bytes)
3444 for (j = 0; j < bytes; j++)
3445 if (arr[j] != arr[i + j])
3446 return 0;
3448 val = arr[0];
3449 for (j = 1; j < bytes; j++)
3450 val = (val << 8) | arr[j];
3452 val = trunc_int_for_mode (val, int_mode);
3454 /* Currently, we only handle SFmode */
3455 gcc_assert (mode == SFmode);
3456 if (mode == SFmode)
3458 int exp = (val >> 23) - 127;
3459 return val > 0 && (val & 0x007fffff) == 0
3460 && exp >= low && exp <= high;
3462 return FALSE;
3465 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3467 static bool
3468 ea_symbol_ref_p (const_rtx x)
3470 tree decl;
3472 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3474 rtx plus = XEXP (x, 0);
3475 rtx op0 = XEXP (plus, 0);
3476 rtx op1 = XEXP (plus, 1);
3477 if (GET_CODE (op1) == CONST_INT)
3478 x = op0;
3481 return (GET_CODE (x) == SYMBOL_REF
3482 && (decl = SYMBOL_REF_DECL (x)) != 0
3483 && TREE_CODE (decl) == VAR_DECL
3484 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3487 /* We accept:
3488 - any 32-bit constant (SImode, SFmode)
3489 - any constant that can be generated with fsmbi (any mode)
3490 - a 64-bit constant where the high and low bits are identical
3491 (DImode, DFmode)
3492 - a 128-bit constant where the four 32-bit words match. */
3493 bool
3494 spu_legitimate_constant_p (machine_mode mode, rtx x)
3496 subrtx_iterator::array_type array;
3497 if (GET_CODE (x) == HIGH)
3498 x = XEXP (x, 0);
3500 /* Reject any __ea qualified reference. These can't appear in
3501 instructions but must be forced to the constant pool. */
3502 FOR_EACH_SUBRTX (iter, array, x, ALL)
3503 if (ea_symbol_ref_p (*iter))
3504 return 0;
3506 /* V4SI with all identical symbols is valid. */
3507 if (!flag_pic
3508 && mode == V4SImode
3509 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3510 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3511 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3512 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3513 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3514 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3516 if (GET_CODE (x) == CONST_VECTOR
3517 && !const_vector_immediate_p (x))
3518 return 0;
3519 return 1;
3522 /* Valid address are:
3523 - symbol_ref, label_ref, const
3524 - reg
3525 - reg + const_int, where const_int is 16 byte aligned
3526 - reg + reg, alignment doesn't matter
3527 The alignment matters in the reg+const case because lqd and stqd
3528 ignore the 4 least significant bits of the const. We only care about
3529 16 byte modes because the expand phase will change all smaller MEM
3530 references to TImode. */
3531 static bool
3532 spu_legitimate_address_p (machine_mode mode,
3533 rtx x, bool reg_ok_strict)
3535 int aligned = GET_MODE_SIZE (mode) >= 16;
3536 if (aligned
3537 && GET_CODE (x) == AND
3538 && GET_CODE (XEXP (x, 1)) == CONST_INT
3539 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3540 x = XEXP (x, 0);
3541 switch (GET_CODE (x))
3543 case LABEL_REF:
3544 return !TARGET_LARGE_MEM;
3546 case SYMBOL_REF:
3547 case CONST:
3548 /* Keep __ea references until reload so that spu_expand_mov can see them
3549 in MEMs. */
3550 if (ea_symbol_ref_p (x))
3551 return !reload_in_progress && !reload_completed;
3552 return !TARGET_LARGE_MEM;
3554 case CONST_INT:
3555 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3557 case SUBREG:
3558 x = XEXP (x, 0);
3559 if (REG_P (x))
3560 return 0;
3562 case REG:
3563 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3565 case PLUS:
3566 case LO_SUM:
3568 rtx op0 = XEXP (x, 0);
3569 rtx op1 = XEXP (x, 1);
3570 if (GET_CODE (op0) == SUBREG)
3571 op0 = XEXP (op0, 0);
3572 if (GET_CODE (op1) == SUBREG)
3573 op1 = XEXP (op1, 0);
3574 if (GET_CODE (op0) == REG
3575 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3576 && GET_CODE (op1) == CONST_INT
3577 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3578 /* If virtual registers are involved, the displacement will
3579 change later on anyway, so checking would be premature.
3580 Reload will make sure the final displacement after
3581 register elimination is OK. */
3582 || op0 == arg_pointer_rtx
3583 || op0 == frame_pointer_rtx
3584 || op0 == virtual_stack_vars_rtx)
3585 && (!aligned || (INTVAL (op1) & 15) == 0))
3586 return TRUE;
3587 if (GET_CODE (op0) == REG
3588 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3589 && GET_CODE (op1) == REG
3590 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3591 return TRUE;
3593 break;
3595 default:
3596 break;
3598 return FALSE;
3601 /* Like spu_legitimate_address_p, except with named addresses. */
3602 static bool
3603 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3604 bool reg_ok_strict, addr_space_t as)
3606 if (as == ADDR_SPACE_EA)
3607 return (REG_P (x) && (GET_MODE (x) == EAmode));
3609 else if (as != ADDR_SPACE_GENERIC)
3610 gcc_unreachable ();
3612 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3615 /* When the address is reg + const_int, force the const_int into a
3616 register. */
3617 static rtx
3618 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3619 machine_mode mode ATTRIBUTE_UNUSED)
3621 rtx op0, op1;
3622 /* Make sure both operands are registers. */
3623 if (GET_CODE (x) == PLUS)
3625 op0 = XEXP (x, 0);
3626 op1 = XEXP (x, 1);
3627 if (ALIGNED_SYMBOL_REF_P (op0))
3629 op0 = force_reg (Pmode, op0);
3630 mark_reg_pointer (op0, 128);
3632 else if (GET_CODE (op0) != REG)
3633 op0 = force_reg (Pmode, op0);
3634 if (ALIGNED_SYMBOL_REF_P (op1))
3636 op1 = force_reg (Pmode, op1);
3637 mark_reg_pointer (op1, 128);
3639 else if (GET_CODE (op1) != REG)
3640 op1 = force_reg (Pmode, op1);
3641 x = gen_rtx_PLUS (Pmode, op0, op1);
3643 return x;
3646 /* Like spu_legitimate_address, except with named address support. */
3647 static rtx
3648 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3649 addr_space_t as)
3651 if (as != ADDR_SPACE_GENERIC)
3652 return x;
3654 return spu_legitimize_address (x, oldx, mode);
3657 /* Reload reg + const_int for out-of-range displacements. */
3659 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3660 int opnum, int type)
3662 bool removed_and = false;
3664 if (GET_CODE (ad) == AND
3665 && CONST_INT_P (XEXP (ad, 1))
3666 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3668 ad = XEXP (ad, 0);
3669 removed_and = true;
3672 if (GET_CODE (ad) == PLUS
3673 && REG_P (XEXP (ad, 0))
3674 && CONST_INT_P (XEXP (ad, 1))
3675 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3676 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3678 /* Unshare the sum. */
3679 ad = copy_rtx (ad);
3681 /* Reload the displacement. */
3682 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3683 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3684 opnum, (enum reload_type) type);
3686 /* Add back AND for alignment if we stripped it. */
3687 if (removed_and)
3688 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3690 return ad;
3693 return NULL_RTX;
3696 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3697 struct attribute_spec.handler. */
3698 static tree
3699 spu_handle_fndecl_attribute (tree * node,
3700 tree name,
3701 tree args ATTRIBUTE_UNUSED,
3702 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3704 if (TREE_CODE (*node) != FUNCTION_DECL)
3706 warning (0, "%qE attribute only applies to functions",
3707 name);
3708 *no_add_attrs = true;
3711 return NULL_TREE;
3714 /* Handle the "vector" attribute. */
3715 static tree
3716 spu_handle_vector_attribute (tree * node, tree name,
3717 tree args ATTRIBUTE_UNUSED,
3718 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3720 tree type = *node, result = NULL_TREE;
3721 machine_mode mode;
3722 int unsigned_p;
3724 while (POINTER_TYPE_P (type)
3725 || TREE_CODE (type) == FUNCTION_TYPE
3726 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3727 type = TREE_TYPE (type);
3729 mode = TYPE_MODE (type);
3731 unsigned_p = TYPE_UNSIGNED (type);
3732 switch (mode)
3734 case DImode:
3735 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3736 break;
3737 case SImode:
3738 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3739 break;
3740 case HImode:
3741 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3742 break;
3743 case QImode:
3744 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3745 break;
3746 case SFmode:
3747 result = V4SF_type_node;
3748 break;
3749 case DFmode:
3750 result = V2DF_type_node;
3751 break;
3752 default:
3753 break;
3756 /* Propagate qualifiers attached to the element type
3757 onto the vector type. */
3758 if (result && result != type && TYPE_QUALS (type))
3759 result = build_qualified_type (result, TYPE_QUALS (type));
3761 *no_add_attrs = true; /* No need to hang on to the attribute. */
3763 if (!result)
3764 warning (0, "%qE attribute ignored", name);
3765 else
3766 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3768 return NULL_TREE;
3771 /* Return nonzero if FUNC is a naked function. */
3772 static int
3773 spu_naked_function_p (tree func)
3775 tree a;
3777 if (TREE_CODE (func) != FUNCTION_DECL)
3778 abort ();
3780 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3781 return a != NULL_TREE;
3785 spu_initial_elimination_offset (int from, int to)
3787 int saved_regs_size = spu_saved_regs_size ();
3788 int sp_offset = 0;
3789 if (!crtl->is_leaf || crtl->outgoing_args_size
3790 || get_frame_size () || saved_regs_size)
3791 sp_offset = STACK_POINTER_OFFSET;
3792 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3793 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3794 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3795 return get_frame_size ();
3796 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3797 return sp_offset + crtl->outgoing_args_size
3798 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3799 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3800 return get_frame_size () + saved_regs_size + sp_offset;
3801 else
3802 gcc_unreachable ();
3806 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3808 machine_mode mode = TYPE_MODE (type);
3809 int byte_size = ((mode == BLKmode)
3810 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3812 /* Make sure small structs are left justified in a register. */
3813 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3814 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3816 machine_mode smode;
3817 rtvec v;
3818 int i;
3819 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3820 int n = byte_size / UNITS_PER_WORD;
3821 v = rtvec_alloc (nregs);
3822 for (i = 0; i < n; i++)
3824 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3825 gen_rtx_REG (TImode,
3826 FIRST_RETURN_REGNUM
3827 + i),
3828 GEN_INT (UNITS_PER_WORD * i));
3829 byte_size -= UNITS_PER_WORD;
3832 if (n < nregs)
3834 if (byte_size < 4)
3835 byte_size = 4;
3836 smode =
3837 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3838 RTVEC_ELT (v, n) =
3839 gen_rtx_EXPR_LIST (VOIDmode,
3840 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3841 GEN_INT (UNITS_PER_WORD * n));
3843 return gen_rtx_PARALLEL (mode, v);
3845 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3848 static rtx
3849 spu_function_arg (cumulative_args_t cum_v,
3850 machine_mode mode,
3851 const_tree type, bool named ATTRIBUTE_UNUSED)
3853 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3854 int byte_size;
3856 if (*cum >= MAX_REGISTER_ARGS)
3857 return 0;
3859 byte_size = ((mode == BLKmode)
3860 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3862 /* The ABI does not allow parameters to be passed partially in
3863 reg and partially in stack. */
3864 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3865 return 0;
3867 /* Make sure small structs are left justified in a register. */
3868 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3869 && byte_size < UNITS_PER_WORD && byte_size > 0)
3871 machine_mode smode;
3872 rtx gr_reg;
3873 if (byte_size < 4)
3874 byte_size = 4;
3875 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3876 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3877 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3878 const0_rtx);
3879 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3881 else
3882 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3885 static void
3886 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3887 const_tree type, bool named ATTRIBUTE_UNUSED)
3889 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3891 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3893 : mode == BLKmode
3894 ? ((int_size_in_bytes (type) + 15) / 16)
3895 : mode == VOIDmode
3897 : HARD_REGNO_NREGS (cum, mode));
3900 /* Variable sized types are passed by reference. */
3901 static bool
3902 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3903 machine_mode mode ATTRIBUTE_UNUSED,
3904 const_tree type, bool named ATTRIBUTE_UNUSED)
3906 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3910 /* Var args. */
3912 /* Create and return the va_list datatype.
3914 On SPU, va_list is an array type equivalent to
3916 typedef struct __va_list_tag
3918 void *__args __attribute__((__aligned(16)));
3919 void *__skip __attribute__((__aligned(16)));
3921 } va_list[1];
3923 where __args points to the arg that will be returned by the next
3924 va_arg(), and __skip points to the previous stack frame such that
3925 when __args == __skip we should advance __args by 32 bytes. */
3926 static tree
3927 spu_build_builtin_va_list (void)
3929 tree f_args, f_skip, record, type_decl;
3930 bool owp;
3932 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3934 type_decl =
3935 build_decl (BUILTINS_LOCATION,
3936 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3938 f_args = build_decl (BUILTINS_LOCATION,
3939 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3940 f_skip = build_decl (BUILTINS_LOCATION,
3941 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3943 DECL_FIELD_CONTEXT (f_args) = record;
3944 DECL_ALIGN (f_args) = 128;
3945 DECL_USER_ALIGN (f_args) = 1;
3947 DECL_FIELD_CONTEXT (f_skip) = record;
3948 DECL_ALIGN (f_skip) = 128;
3949 DECL_USER_ALIGN (f_skip) = 1;
3951 TYPE_STUB_DECL (record) = type_decl;
3952 TYPE_NAME (record) = type_decl;
3953 TYPE_FIELDS (record) = f_args;
3954 DECL_CHAIN (f_args) = f_skip;
3956 /* We know this is being padded and we want it too. It is an internal
3957 type so hide the warnings from the user. */
3958 owp = warn_padded;
3959 warn_padded = false;
3961 layout_type (record);
3963 warn_padded = owp;
3965 /* The correct type is an array type of one element. */
3966 return build_array_type (record, build_index_type (size_zero_node));
3969 /* Implement va_start by filling the va_list structure VALIST.
3970 NEXTARG points to the first anonymous stack argument.
3972 The following global variables are used to initialize
3973 the va_list structure:
3975 crtl->args.info;
3976 the CUMULATIVE_ARGS for this function
3978 crtl->args.arg_offset_rtx:
3979 holds the offset of the first anonymous stack argument
3980 (relative to the virtual arg pointer). */
3982 static void
3983 spu_va_start (tree valist, rtx nextarg)
3985 tree f_args, f_skip;
3986 tree args, skip, t;
3988 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3989 f_skip = DECL_CHAIN (f_args);
3991 valist = build_simple_mem_ref (valist);
3992 args =
3993 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3994 skip =
3995 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3997 /* Find the __args area. */
3998 t = make_tree (TREE_TYPE (args), nextarg);
3999 if (crtl->args.pretend_args_size > 0)
4000 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4001 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4002 TREE_SIDE_EFFECTS (t) = 1;
4003 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4005 /* Find the __skip area. */
4006 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4007 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4008 - STACK_POINTER_OFFSET));
4009 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4010 TREE_SIDE_EFFECTS (t) = 1;
4011 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4014 /* Gimplify va_arg by updating the va_list structure
4015 VALIST as required to retrieve an argument of type
4016 TYPE, and returning that argument.
4018 ret = va_arg(VALIST, TYPE);
4020 generates code equivalent to:
4022 paddedsize = (sizeof(TYPE) + 15) & -16;
4023 if (VALIST.__args + paddedsize > VALIST.__skip
4024 && VALIST.__args <= VALIST.__skip)
4025 addr = VALIST.__skip + 32;
4026 else
4027 addr = VALIST.__args;
4028 VALIST.__args = addr + paddedsize;
4029 ret = *(TYPE *)addr;
4031 static tree
4032 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4033 gimple_seq * post_p ATTRIBUTE_UNUSED)
4035 tree f_args, f_skip;
4036 tree args, skip;
4037 HOST_WIDE_INT size, rsize;
4038 tree addr, tmp;
4039 bool pass_by_reference_p;
4041 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4042 f_skip = DECL_CHAIN (f_args);
4044 args =
4045 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4046 skip =
4047 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4049 addr = create_tmp_var (ptr_type_node, "va_arg");
4051 /* if an object is dynamically sized, a pointer to it is passed
4052 instead of the object itself. */
4053 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4054 false);
4055 if (pass_by_reference_p)
4056 type = build_pointer_type (type);
4057 size = int_size_in_bytes (type);
4058 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4060 /* build conditional expression to calculate addr. The expression
4061 will be gimplified later. */
4062 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4063 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4064 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4065 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4066 unshare_expr (skip)));
4068 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4069 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4070 unshare_expr (args));
4072 gimplify_assign (addr, tmp, pre_p);
4074 /* update VALIST.__args */
4075 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4076 gimplify_assign (unshare_expr (args), tmp, pre_p);
4078 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4079 addr);
4081 if (pass_by_reference_p)
4082 addr = build_va_arg_indirect_ref (addr);
4084 return build_va_arg_indirect_ref (addr);
4087 /* Save parameter registers starting with the register that corresponds
4088 to the first unnamed parameters. If the first unnamed parameter is
4089 in the stack then save no registers. Set pretend_args_size to the
4090 amount of space needed to save the registers. */
4091 static void
4092 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4093 tree type, int *pretend_size, int no_rtl)
4095 if (!no_rtl)
4097 rtx tmp;
4098 int regno;
4099 int offset;
4100 int ncum = *get_cumulative_args (cum);
4102 /* cum currently points to the last named argument, we want to
4103 start at the next argument. */
4104 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4106 offset = -STACK_POINTER_OFFSET;
4107 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4109 tmp = gen_frame_mem (V4SImode,
4110 plus_constant (Pmode, virtual_incoming_args_rtx,
4111 offset));
4112 emit_move_insn (tmp,
4113 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4114 offset += 16;
4116 *pretend_size = offset + STACK_POINTER_OFFSET;
4120 static void
4121 spu_conditional_register_usage (void)
4123 if (flag_pic)
4125 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4126 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4130 /* This is called any time we inspect the alignment of a register for
4131 addresses. */
4132 static int
4133 reg_aligned_for_addr (rtx x)
4135 int regno =
4136 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4137 return REGNO_POINTER_ALIGN (regno) >= 128;
4140 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4141 into its SYMBOL_REF_FLAGS. */
4142 static void
4143 spu_encode_section_info (tree decl, rtx rtl, int first)
4145 default_encode_section_info (decl, rtl, first);
4147 /* If a variable has a forced alignment to < 16 bytes, mark it with
4148 SYMBOL_FLAG_ALIGN1. */
4149 if (TREE_CODE (decl) == VAR_DECL
4150 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4151 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4154 /* Return TRUE if we are certain the mem refers to a complete object
4155 which is both 16-byte aligned and padded to a 16-byte boundary. This
4156 would make it safe to store with a single instruction.
4157 We guarantee the alignment and padding for static objects by aligning
4158 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4159 FIXME: We currently cannot guarantee this for objects on the stack
4160 because assign_parm_setup_stack calls assign_stack_local with the
4161 alignment of the parameter mode and in that case the alignment never
4162 gets adjusted by LOCAL_ALIGNMENT. */
4163 static int
4164 store_with_one_insn_p (rtx mem)
4166 machine_mode mode = GET_MODE (mem);
4167 rtx addr = XEXP (mem, 0);
4168 if (mode == BLKmode)
4169 return 0;
4170 if (GET_MODE_SIZE (mode) >= 16)
4171 return 1;
4172 /* Only static objects. */
4173 if (GET_CODE (addr) == SYMBOL_REF)
4175 /* We use the associated declaration to make sure the access is
4176 referring to the whole object.
4177 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4178 if it is necessary. Will there be cases where one exists, and
4179 the other does not? Will there be cases where both exist, but
4180 have different types? */
4181 tree decl = MEM_EXPR (mem);
4182 if (decl
4183 && TREE_CODE (decl) == VAR_DECL
4184 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4185 return 1;
4186 decl = SYMBOL_REF_DECL (addr);
4187 if (decl
4188 && TREE_CODE (decl) == VAR_DECL
4189 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4190 return 1;
4192 return 0;
4195 /* Return 1 when the address is not valid for a simple load and store as
4196 required by the '_mov*' patterns. We could make this less strict
4197 for loads, but we prefer mem's to look the same so they are more
4198 likely to be merged. */
4199 static int
4200 address_needs_split (rtx mem)
4202 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4203 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4204 || !(store_with_one_insn_p (mem)
4205 || mem_is_padded_component_ref (mem))))
4206 return 1;
4208 return 0;
4211 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4212 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4213 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4215 /* MEM is known to be an __ea qualified memory access. Emit a call to
4216 fetch the ppu memory to local store, and return its address in local
4217 store. */
4219 static void
4220 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4222 if (is_store)
4224 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4225 if (!cache_fetch_dirty)
4226 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4227 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4228 2, ea_addr, EAmode, ndirty, SImode);
4230 else
4232 if (!cache_fetch)
4233 cache_fetch = init_one_libfunc ("__cache_fetch");
4234 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4235 1, ea_addr, EAmode);
4239 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4240 dirty bit marking, inline.
4242 The cache control data structure is an array of
4244 struct __cache_tag_array
4246 unsigned int tag_lo[4];
4247 unsigned int tag_hi[4];
4248 void *data_pointer[4];
4249 int reserved[4];
4250 vector unsigned short dirty_bits[4];
4251 } */
4253 static void
4254 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4256 rtx ea_addr_si;
4257 HOST_WIDE_INT v;
4258 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4259 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4260 rtx index_mask = gen_reg_rtx (SImode);
4261 rtx tag_arr = gen_reg_rtx (Pmode);
4262 rtx splat_mask = gen_reg_rtx (TImode);
4263 rtx splat = gen_reg_rtx (V4SImode);
4264 rtx splat_hi = NULL_RTX;
4265 rtx tag_index = gen_reg_rtx (Pmode);
4266 rtx block_off = gen_reg_rtx (SImode);
4267 rtx tag_addr = gen_reg_rtx (Pmode);
4268 rtx tag = gen_reg_rtx (V4SImode);
4269 rtx cache_tag = gen_reg_rtx (V4SImode);
4270 rtx cache_tag_hi = NULL_RTX;
4271 rtx cache_ptrs = gen_reg_rtx (TImode);
4272 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4273 rtx tag_equal = gen_reg_rtx (V4SImode);
4274 rtx tag_equal_hi = NULL_RTX;
4275 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4276 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4277 rtx eq_index = gen_reg_rtx (SImode);
4278 rtx bcomp, hit_label, hit_ref, cont_label;
4279 rtx_insn *insn;
4281 if (spu_ea_model != 32)
4283 splat_hi = gen_reg_rtx (V4SImode);
4284 cache_tag_hi = gen_reg_rtx (V4SImode);
4285 tag_equal_hi = gen_reg_rtx (V4SImode);
4288 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4289 emit_move_insn (tag_arr, tag_arr_sym);
4290 v = 0x0001020300010203LL;
4291 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4292 ea_addr_si = ea_addr;
4293 if (spu_ea_model != 32)
4294 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4296 /* tag_index = ea_addr & (tag_array_size - 128) */
4297 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4299 /* splat ea_addr to all 4 slots. */
4300 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4301 /* Similarly for high 32 bits of ea_addr. */
4302 if (spu_ea_model != 32)
4303 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4305 /* block_off = ea_addr & 127 */
4306 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4308 /* tag_addr = tag_arr + tag_index */
4309 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4311 /* Read cache tags. */
4312 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4313 if (spu_ea_model != 32)
4314 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4315 plus_constant (Pmode,
4316 tag_addr, 16)));
4318 /* tag = ea_addr & -128 */
4319 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4321 /* Read all four cache data pointers. */
4322 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4323 plus_constant (Pmode,
4324 tag_addr, 32)));
4326 /* Compare tags. */
4327 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4328 if (spu_ea_model != 32)
4330 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4331 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4334 /* At most one of the tags compare equal, so tag_equal has one
4335 32-bit slot set to all 1's, with the other slots all zero.
4336 gbb picks off low bit from each byte in the 128-bit registers,
4337 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4338 we have a hit. */
4339 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4340 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4342 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4343 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4345 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4346 (rotating eq_index mod 16 bytes). */
4347 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4348 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4350 /* Add block offset to form final data address. */
4351 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4353 /* Check that we did hit. */
4354 hit_label = gen_label_rtx ();
4355 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4356 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4357 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4358 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4359 hit_ref, pc_rtx)));
4360 /* Say that this branch is very likely to happen. */
4361 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4362 add_int_reg_note (insn, REG_BR_PROB, v);
4364 ea_load_store (mem, is_store, ea_addr, data_addr);
4365 cont_label = gen_label_rtx ();
4366 emit_jump_insn (gen_jump (cont_label));
4367 emit_barrier ();
4369 emit_label (hit_label);
4371 if (is_store)
4373 HOST_WIDE_INT v_hi;
4374 rtx dirty_bits = gen_reg_rtx (TImode);
4375 rtx dirty_off = gen_reg_rtx (SImode);
4376 rtx dirty_128 = gen_reg_rtx (TImode);
4377 rtx neg_block_off = gen_reg_rtx (SImode);
4379 /* Set up mask with one dirty bit per byte of the mem we are
4380 writing, starting from top bit. */
4381 v_hi = v = -1;
4382 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4383 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4385 v_hi = v;
4386 v = 0;
4388 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4390 /* Form index into cache dirty_bits. eq_index is one of
4391 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4392 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4393 offset to each of the four dirty_bits elements. */
4394 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4396 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4398 /* Rotate bit mask to proper bit. */
4399 emit_insn (gen_negsi2 (neg_block_off, block_off));
4400 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4401 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4403 /* Or in the new dirty bits. */
4404 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4406 /* Store. */
4407 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4410 emit_label (cont_label);
4413 static rtx
4414 expand_ea_mem (rtx mem, bool is_store)
4416 rtx ea_addr;
4417 rtx data_addr = gen_reg_rtx (Pmode);
4418 rtx new_mem;
4420 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4421 if (optimize_size || optimize == 0)
4422 ea_load_store (mem, is_store, ea_addr, data_addr);
4423 else
4424 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4426 if (ea_alias_set == -1)
4427 ea_alias_set = new_alias_set ();
4429 /* We generate a new MEM RTX to refer to the copy of the data
4430 in the cache. We do not copy memory attributes (except the
4431 alignment) from the original MEM, as they may no longer apply
4432 to the cache copy. */
4433 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4434 set_mem_alias_set (new_mem, ea_alias_set);
4435 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4437 return new_mem;
4441 spu_expand_mov (rtx * ops, machine_mode mode)
4443 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4445 /* Perform the move in the destination SUBREG's inner mode. */
4446 ops[0] = SUBREG_REG (ops[0]);
4447 mode = GET_MODE (ops[0]);
4448 ops[1] = gen_lowpart_common (mode, ops[1]);
4449 gcc_assert (ops[1]);
4452 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4454 rtx from = SUBREG_REG (ops[1]);
4455 machine_mode imode = int_mode_for_mode (GET_MODE (from));
4457 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4458 && GET_MODE_CLASS (imode) == MODE_INT
4459 && subreg_lowpart_p (ops[1]));
4461 if (GET_MODE_SIZE (imode) < 4)
4462 imode = SImode;
4463 if (imode != GET_MODE (from))
4464 from = gen_rtx_SUBREG (imode, from, 0);
4466 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4468 enum insn_code icode = convert_optab_handler (trunc_optab,
4469 mode, imode);
4470 emit_insn (GEN_FCN (icode) (ops[0], from));
4472 else
4473 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4474 return 1;
4477 /* At least one of the operands needs to be a register. */
4478 if ((reload_in_progress | reload_completed) == 0
4479 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4481 rtx temp = force_reg (mode, ops[1]);
4482 emit_move_insn (ops[0], temp);
4483 return 1;
4485 if (reload_in_progress || reload_completed)
4487 if (CONSTANT_P (ops[1]))
4488 return spu_split_immediate (ops);
4489 return 0;
4492 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4493 extend them. */
4494 if (GET_CODE (ops[1]) == CONST_INT)
4496 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4497 if (val != INTVAL (ops[1]))
4499 emit_move_insn (ops[0], GEN_INT (val));
4500 return 1;
4503 if (MEM_P (ops[0]))
4505 if (MEM_ADDR_SPACE (ops[0]))
4506 ops[0] = expand_ea_mem (ops[0], true);
4507 return spu_split_store (ops);
4509 if (MEM_P (ops[1]))
4511 if (MEM_ADDR_SPACE (ops[1]))
4512 ops[1] = expand_ea_mem (ops[1], false);
4513 return spu_split_load (ops);
4516 return 0;
4519 static void
4520 spu_convert_move (rtx dst, rtx src)
4522 machine_mode mode = GET_MODE (dst);
4523 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4524 rtx reg;
4525 gcc_assert (GET_MODE (src) == TImode);
4526 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4527 emit_insn (gen_rtx_SET (reg,
4528 gen_rtx_TRUNCATE (int_mode,
4529 gen_rtx_LSHIFTRT (TImode, src,
4530 GEN_INT (int_mode == DImode ? 64 : 96)))));
4531 if (int_mode != mode)
4533 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4534 emit_move_insn (dst, reg);
4538 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4539 the address from SRC and SRC+16. Return a REG or CONST_INT that
4540 specifies how many bytes to rotate the loaded registers, plus any
4541 extra from EXTRA_ROTQBY. The address and rotate amounts are
4542 normalized to improve merging of loads and rotate computations. */
4543 static rtx
4544 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4546 rtx addr = XEXP (src, 0);
4547 rtx p0, p1, rot, addr0, addr1;
4548 int rot_amt;
4550 rot = 0;
4551 rot_amt = 0;
4553 if (MEM_ALIGN (src) >= 128)
4554 /* Address is already aligned; simply perform a TImode load. */ ;
4555 else if (GET_CODE (addr) == PLUS)
4557 /* 8 cases:
4558 aligned reg + aligned reg => lqx
4559 aligned reg + unaligned reg => lqx, rotqby
4560 aligned reg + aligned const => lqd
4561 aligned reg + unaligned const => lqd, rotqbyi
4562 unaligned reg + aligned reg => lqx, rotqby
4563 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4564 unaligned reg + aligned const => lqd, rotqby
4565 unaligned reg + unaligned const -> not allowed by legitimate address
4567 p0 = XEXP (addr, 0);
4568 p1 = XEXP (addr, 1);
4569 if (!reg_aligned_for_addr (p0))
4571 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4573 rot = gen_reg_rtx (SImode);
4574 emit_insn (gen_addsi3 (rot, p0, p1));
4576 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4578 if (INTVAL (p1) > 0
4579 && REG_POINTER (p0)
4580 && INTVAL (p1) * BITS_PER_UNIT
4581 < REGNO_POINTER_ALIGN (REGNO (p0)))
4583 rot = gen_reg_rtx (SImode);
4584 emit_insn (gen_addsi3 (rot, p0, p1));
4585 addr = p0;
4587 else
4589 rtx x = gen_reg_rtx (SImode);
4590 emit_move_insn (x, p1);
4591 if (!spu_arith_operand (p1, SImode))
4592 p1 = x;
4593 rot = gen_reg_rtx (SImode);
4594 emit_insn (gen_addsi3 (rot, p0, p1));
4595 addr = gen_rtx_PLUS (Pmode, p0, x);
4598 else
4599 rot = p0;
4601 else
4603 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4605 rot_amt = INTVAL (p1) & 15;
4606 if (INTVAL (p1) & -16)
4608 p1 = GEN_INT (INTVAL (p1) & -16);
4609 addr = gen_rtx_PLUS (SImode, p0, p1);
4611 else
4612 addr = p0;
4614 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4615 rot = p1;
4618 else if (REG_P (addr))
4620 if (!reg_aligned_for_addr (addr))
4621 rot = addr;
4623 else if (GET_CODE (addr) == CONST)
4625 if (GET_CODE (XEXP (addr, 0)) == PLUS
4626 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4627 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4629 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4630 if (rot_amt & -16)
4631 addr = gen_rtx_CONST (Pmode,
4632 gen_rtx_PLUS (Pmode,
4633 XEXP (XEXP (addr, 0), 0),
4634 GEN_INT (rot_amt & -16)));
4635 else
4636 addr = XEXP (XEXP (addr, 0), 0);
4638 else
4640 rot = gen_reg_rtx (Pmode);
4641 emit_move_insn (rot, addr);
4644 else if (GET_CODE (addr) == CONST_INT)
4646 rot_amt = INTVAL (addr);
4647 addr = GEN_INT (rot_amt & -16);
4649 else if (!ALIGNED_SYMBOL_REF_P (addr))
4651 rot = gen_reg_rtx (Pmode);
4652 emit_move_insn (rot, addr);
4655 rot_amt += extra_rotby;
4657 rot_amt &= 15;
4659 if (rot && rot_amt)
4661 rtx x = gen_reg_rtx (SImode);
4662 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4663 rot = x;
4664 rot_amt = 0;
4666 if (!rot && rot_amt)
4667 rot = GEN_INT (rot_amt);
4669 addr0 = copy_rtx (addr);
4670 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4671 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4673 if (dst1)
4675 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4676 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4677 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4680 return rot;
4684 spu_split_load (rtx * ops)
4686 machine_mode mode = GET_MODE (ops[0]);
4687 rtx addr, load, rot;
4688 int rot_amt;
4690 if (GET_MODE_SIZE (mode) >= 16)
4691 return 0;
4693 addr = XEXP (ops[1], 0);
4694 gcc_assert (GET_CODE (addr) != AND);
4696 if (!address_needs_split (ops[1]))
4698 ops[1] = change_address (ops[1], TImode, addr);
4699 load = gen_reg_rtx (TImode);
4700 emit_insn (gen__movti (load, ops[1]));
4701 spu_convert_move (ops[0], load);
4702 return 1;
4705 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4707 load = gen_reg_rtx (TImode);
4708 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4710 if (rot)
4711 emit_insn (gen_rotqby_ti (load, load, rot));
4713 spu_convert_move (ops[0], load);
4714 return 1;
4718 spu_split_store (rtx * ops)
4720 machine_mode mode = GET_MODE (ops[0]);
4721 rtx reg;
4722 rtx addr, p0, p1, p1_lo, smem;
4723 int aform;
4724 int scalar;
4726 if (GET_MODE_SIZE (mode) >= 16)
4727 return 0;
4729 addr = XEXP (ops[0], 0);
4730 gcc_assert (GET_CODE (addr) != AND);
4732 if (!address_needs_split (ops[0]))
4734 reg = gen_reg_rtx (TImode);
4735 emit_insn (gen_spu_convert (reg, ops[1]));
4736 ops[0] = change_address (ops[0], TImode, addr);
4737 emit_move_insn (ops[0], reg);
4738 return 1;
4741 if (GET_CODE (addr) == PLUS)
4743 /* 8 cases:
4744 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4745 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4746 aligned reg + aligned const => lqd, c?d, shuf, stqx
4747 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4748 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4749 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4750 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4751 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4753 aform = 0;
4754 p0 = XEXP (addr, 0);
4755 p1 = p1_lo = XEXP (addr, 1);
4756 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4758 p1_lo = GEN_INT (INTVAL (p1) & 15);
4759 if (reg_aligned_for_addr (p0))
4761 p1 = GEN_INT (INTVAL (p1) & -16);
4762 if (p1 == const0_rtx)
4763 addr = p0;
4764 else
4765 addr = gen_rtx_PLUS (SImode, p0, p1);
4767 else
4769 rtx x = gen_reg_rtx (SImode);
4770 emit_move_insn (x, p1);
4771 addr = gen_rtx_PLUS (SImode, p0, x);
4775 else if (REG_P (addr))
4777 aform = 0;
4778 p0 = addr;
4779 p1 = p1_lo = const0_rtx;
4781 else
4783 aform = 1;
4784 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4785 p1 = 0; /* aform doesn't use p1 */
4786 p1_lo = addr;
4787 if (ALIGNED_SYMBOL_REF_P (addr))
4788 p1_lo = const0_rtx;
4789 else if (GET_CODE (addr) == CONST
4790 && GET_CODE (XEXP (addr, 0)) == PLUS
4791 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4792 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4794 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4795 if ((v & -16) != 0)
4796 addr = gen_rtx_CONST (Pmode,
4797 gen_rtx_PLUS (Pmode,
4798 XEXP (XEXP (addr, 0), 0),
4799 GEN_INT (v & -16)));
4800 else
4801 addr = XEXP (XEXP (addr, 0), 0);
4802 p1_lo = GEN_INT (v & 15);
4804 else if (GET_CODE (addr) == CONST_INT)
4806 p1_lo = GEN_INT (INTVAL (addr) & 15);
4807 addr = GEN_INT (INTVAL (addr) & -16);
4809 else
4811 p1_lo = gen_reg_rtx (SImode);
4812 emit_move_insn (p1_lo, addr);
4816 gcc_assert (aform == 0 || aform == 1);
4817 reg = gen_reg_rtx (TImode);
4819 scalar = store_with_one_insn_p (ops[0]);
4820 if (!scalar)
4822 /* We could copy the flags from the ops[0] MEM to mem here,
4823 We don't because we want this load to be optimized away if
4824 possible, and copying the flags will prevent that in certain
4825 cases, e.g. consider the volatile flag. */
4827 rtx pat = gen_reg_rtx (TImode);
4828 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4829 set_mem_alias_set (lmem, 0);
4830 emit_insn (gen_movti (reg, lmem));
4832 if (!p0 || reg_aligned_for_addr (p0))
4833 p0 = stack_pointer_rtx;
4834 if (!p1_lo)
4835 p1_lo = const0_rtx;
4837 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4838 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4840 else
4842 if (GET_CODE (ops[1]) == REG)
4843 emit_insn (gen_spu_convert (reg, ops[1]));
4844 else if (GET_CODE (ops[1]) == SUBREG)
4845 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4846 else
4847 abort ();
4850 if (GET_MODE_SIZE (mode) < 4 && scalar)
4851 emit_insn (gen_ashlti3
4852 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4854 smem = change_address (ops[0], TImode, copy_rtx (addr));
4855 /* We can't use the previous alias set because the memory has changed
4856 size and can potentially overlap objects of other types. */
4857 set_mem_alias_set (smem, 0);
4859 emit_insn (gen_movti (smem, reg));
4860 return 1;
4863 /* Return TRUE if X is MEM which is a struct member reference
4864 and the member can safely be loaded and stored with a single
4865 instruction because it is padded. */
4866 static int
4867 mem_is_padded_component_ref (rtx x)
4869 tree t = MEM_EXPR (x);
4870 tree r;
4871 if (!t || TREE_CODE (t) != COMPONENT_REF)
4872 return 0;
4873 t = TREE_OPERAND (t, 1);
4874 if (!t || TREE_CODE (t) != FIELD_DECL
4875 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4876 return 0;
4877 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4878 r = DECL_FIELD_CONTEXT (t);
4879 if (!r || TREE_CODE (r) != RECORD_TYPE)
4880 return 0;
4881 /* Make sure they are the same mode */
4882 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4883 return 0;
4884 /* If there are no following fields then the field alignment assures
4885 the structure is padded to the alignment which means this field is
4886 padded too. */
4887 if (TREE_CHAIN (t) == 0)
4888 return 1;
4889 /* If the following field is also aligned then this field will be
4890 padded. */
4891 t = TREE_CHAIN (t);
4892 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4893 return 1;
4894 return 0;
4897 /* Parse the -mfixed-range= option string. */
4898 static void
4899 fix_range (const char *const_str)
4901 int i, first, last;
4902 char *str, *dash, *comma;
4904 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4905 REG2 are either register names or register numbers. The effect
4906 of this option is to mark the registers in the range from REG1 to
4907 REG2 as ``fixed'' so they won't be used by the compiler. */
4909 i = strlen (const_str);
4910 str = (char *) alloca (i + 1);
4911 memcpy (str, const_str, i + 1);
4913 while (1)
4915 dash = strchr (str, '-');
4916 if (!dash)
4918 warning (0, "value of -mfixed-range must have form REG1-REG2");
4919 return;
4921 *dash = '\0';
4922 comma = strchr (dash + 1, ',');
4923 if (comma)
4924 *comma = '\0';
4926 first = decode_reg_name (str);
4927 if (first < 0)
4929 warning (0, "unknown register name: %s", str);
4930 return;
4933 last = decode_reg_name (dash + 1);
4934 if (last < 0)
4936 warning (0, "unknown register name: %s", dash + 1);
4937 return;
4940 *dash = '-';
4942 if (first > last)
4944 warning (0, "%s-%s is an empty range", str, dash + 1);
4945 return;
4948 for (i = first; i <= last; ++i)
4949 fixed_regs[i] = call_used_regs[i] = 1;
4951 if (!comma)
4952 break;
4954 *comma = ',';
4955 str = comma + 1;
4959 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4960 can be generated using the fsmbi instruction. */
4962 fsmbi_const_p (rtx x)
4964 if (CONSTANT_P (x))
4966 /* We can always choose TImode for CONST_INT because the high bits
4967 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4968 enum immediate_class c = classify_immediate (x, TImode);
4969 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4971 return 0;
4974 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4975 can be generated using the cbd, chd, cwd or cdd instruction. */
4977 cpat_const_p (rtx x, machine_mode mode)
4979 if (CONSTANT_P (x))
4981 enum immediate_class c = classify_immediate (x, mode);
4982 return c == IC_CPAT;
4984 return 0;
4988 gen_cpat_const (rtx * ops)
4990 unsigned char dst[16];
4991 int i, offset, shift, isize;
4992 if (GET_CODE (ops[3]) != CONST_INT
4993 || GET_CODE (ops[2]) != CONST_INT
4994 || (GET_CODE (ops[1]) != CONST_INT
4995 && GET_CODE (ops[1]) != REG))
4996 return 0;
4997 if (GET_CODE (ops[1]) == REG
4998 && (!REG_POINTER (ops[1])
4999 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5000 return 0;
5002 for (i = 0; i < 16; i++)
5003 dst[i] = i + 16;
5004 isize = INTVAL (ops[3]);
5005 if (isize == 1)
5006 shift = 3;
5007 else if (isize == 2)
5008 shift = 2;
5009 else
5010 shift = 0;
5011 offset = (INTVAL (ops[2]) +
5012 (GET_CODE (ops[1]) ==
5013 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5014 for (i = 0; i < isize; i++)
5015 dst[offset + i] = i + shift;
5016 return array_to_constant (TImode, dst);
5019 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5020 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5021 than 16 bytes, the value is repeated across the rest of the array. */
5022 void
5023 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5025 HOST_WIDE_INT val;
5026 int i, j, first;
5028 memset (arr, 0, 16);
5029 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5030 if (GET_CODE (x) == CONST_INT
5031 || (GET_CODE (x) == CONST_DOUBLE
5032 && (mode == SFmode || mode == DFmode)))
5034 gcc_assert (mode != VOIDmode && mode != BLKmode);
5036 if (GET_CODE (x) == CONST_DOUBLE)
5037 val = const_double_to_hwint (x);
5038 else
5039 val = INTVAL (x);
5040 first = GET_MODE_SIZE (mode) - 1;
5041 for (i = first; i >= 0; i--)
5043 arr[i] = val & 0xff;
5044 val >>= 8;
5046 /* Splat the constant across the whole array. */
5047 for (j = 0, i = first + 1; i < 16; i++)
5049 arr[i] = arr[j];
5050 j = (j == first) ? 0 : j + 1;
5053 else if (GET_CODE (x) == CONST_DOUBLE)
5055 val = CONST_DOUBLE_LOW (x);
5056 for (i = 15; i >= 8; i--)
5058 arr[i] = val & 0xff;
5059 val >>= 8;
5061 val = CONST_DOUBLE_HIGH (x);
5062 for (i = 7; i >= 0; i--)
5064 arr[i] = val & 0xff;
5065 val >>= 8;
5068 else if (GET_CODE (x) == CONST_VECTOR)
5070 int units;
5071 rtx elt;
5072 mode = GET_MODE_INNER (mode);
5073 units = CONST_VECTOR_NUNITS (x);
5074 for (i = 0; i < units; i++)
5076 elt = CONST_VECTOR_ELT (x, i);
5077 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5079 if (GET_CODE (elt) == CONST_DOUBLE)
5080 val = const_double_to_hwint (elt);
5081 else
5082 val = INTVAL (elt);
5083 first = GET_MODE_SIZE (mode) - 1;
5084 if (first + i * GET_MODE_SIZE (mode) > 16)
5085 abort ();
5086 for (j = first; j >= 0; j--)
5088 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5089 val >>= 8;
5094 else
5095 gcc_unreachable();
5098 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5099 smaller than 16 bytes, use the bytes that would represent that value
5100 in a register, e.g., for QImode return the value of arr[3]. */
5102 array_to_constant (machine_mode mode, const unsigned char arr[16])
5104 machine_mode inner_mode;
5105 rtvec v;
5106 int units, size, i, j, k;
5107 HOST_WIDE_INT val;
5109 if (GET_MODE_CLASS (mode) == MODE_INT
5110 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5112 j = GET_MODE_SIZE (mode);
5113 i = j < 4 ? 4 - j : 0;
5114 for (val = 0; i < j; i++)
5115 val = (val << 8) | arr[i];
5116 val = trunc_int_for_mode (val, mode);
5117 return GEN_INT (val);
5120 if (mode == TImode)
5122 HOST_WIDE_INT high;
5123 for (i = high = 0; i < 8; i++)
5124 high = (high << 8) | arr[i];
5125 for (i = 8, val = 0; i < 16; i++)
5126 val = (val << 8) | arr[i];
5127 return immed_double_const (val, high, TImode);
5129 if (mode == SFmode)
5131 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5132 val = trunc_int_for_mode (val, SImode);
5133 return hwint_to_const_double (SFmode, val);
5135 if (mode == DFmode)
5137 for (i = 0, val = 0; i < 8; i++)
5138 val = (val << 8) | arr[i];
5139 return hwint_to_const_double (DFmode, val);
5142 if (!VECTOR_MODE_P (mode))
5143 abort ();
5145 units = GET_MODE_NUNITS (mode);
5146 size = GET_MODE_UNIT_SIZE (mode);
5147 inner_mode = GET_MODE_INNER (mode);
5148 v = rtvec_alloc (units);
5150 for (k = i = 0; i < units; ++i)
5152 val = 0;
5153 for (j = 0; j < size; j++, k++)
5154 val = (val << 8) | arr[k];
5156 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5157 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5158 else
5159 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5161 if (k > 16)
5162 abort ();
5164 return gen_rtx_CONST_VECTOR (mode, v);
5167 static void
5168 reloc_diagnostic (rtx x)
5170 tree decl = 0;
5171 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5172 return;
5174 if (GET_CODE (x) == SYMBOL_REF)
5175 decl = SYMBOL_REF_DECL (x);
5176 else if (GET_CODE (x) == CONST
5177 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5178 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5180 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5181 if (decl && !DECL_P (decl))
5182 decl = 0;
5184 /* The decl could be a string constant. */
5185 if (decl && DECL_P (decl))
5187 location_t loc;
5188 /* We use last_assemble_variable_decl to get line information. It's
5189 not always going to be right and might not even be close, but will
5190 be right for the more common cases. */
5191 if (!last_assemble_variable_decl || in_section == ctors_section)
5192 loc = DECL_SOURCE_LOCATION (decl);
5193 else
5194 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5196 if (TARGET_WARN_RELOC)
5197 warning_at (loc, 0,
5198 "creating run-time relocation for %qD", decl);
5199 else
5200 error_at (loc,
5201 "creating run-time relocation for %qD", decl);
5203 else
5205 if (TARGET_WARN_RELOC)
5206 warning_at (input_location, 0, "creating run-time relocation");
5207 else
5208 error_at (input_location, "creating run-time relocation");
5212 /* Hook into assemble_integer so we can generate an error for run-time
5213 relocations. The SPU ABI disallows them. */
5214 static bool
5215 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5217 /* By default run-time relocations aren't supported, but we allow them
5218 in case users support it in their own run-time loader. And we provide
5219 a warning for those users that don't. */
5220 if ((GET_CODE (x) == SYMBOL_REF)
5221 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5222 reloc_diagnostic (x);
5224 return default_assemble_integer (x, size, aligned_p);
5227 static void
5228 spu_asm_globalize_label (FILE * file, const char *name)
5230 fputs ("\t.global\t", file);
5231 assemble_name (file, name);
5232 fputs ("\n", file);
5235 static bool
5236 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5237 int opno ATTRIBUTE_UNUSED, int *total,
5238 bool speed ATTRIBUTE_UNUSED)
5240 int code = GET_CODE (x);
5241 int cost = COSTS_N_INSNS (2);
5243 /* Folding to a CONST_VECTOR will use extra space but there might
5244 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5245 only if it allows us to fold away multiple insns. Changing the cost
5246 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5247 because this cost will only be compared against a single insn.
5248 if (code == CONST_VECTOR)
5249 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5252 /* Use defaults for float operations. Not accurate but good enough. */
5253 if (mode == DFmode)
5255 *total = COSTS_N_INSNS (13);
5256 return true;
5258 if (mode == SFmode)
5260 *total = COSTS_N_INSNS (6);
5261 return true;
5263 switch (code)
5265 case CONST_INT:
5266 if (satisfies_constraint_K (x))
5267 *total = 0;
5268 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5269 *total = COSTS_N_INSNS (1);
5270 else
5271 *total = COSTS_N_INSNS (3);
5272 return true;
5274 case CONST:
5275 *total = COSTS_N_INSNS (3);
5276 return true;
5278 case LABEL_REF:
5279 case SYMBOL_REF:
5280 *total = COSTS_N_INSNS (0);
5281 return true;
5283 case CONST_DOUBLE:
5284 *total = COSTS_N_INSNS (5);
5285 return true;
5287 case FLOAT_EXTEND:
5288 case FLOAT_TRUNCATE:
5289 case FLOAT:
5290 case UNSIGNED_FLOAT:
5291 case FIX:
5292 case UNSIGNED_FIX:
5293 *total = COSTS_N_INSNS (7);
5294 return true;
5296 case PLUS:
5297 if (mode == TImode)
5299 *total = COSTS_N_INSNS (9);
5300 return true;
5302 break;
5304 case MULT:
5305 cost =
5306 GET_CODE (XEXP (x, 0)) ==
5307 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5308 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5310 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5312 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5313 cost = COSTS_N_INSNS (14);
5314 if ((val & 0xffff) == 0)
5315 cost = COSTS_N_INSNS (9);
5316 else if (val > 0 && val < 0x10000)
5317 cost = COSTS_N_INSNS (11);
5320 *total = cost;
5321 return true;
5322 case DIV:
5323 case UDIV:
5324 case MOD:
5325 case UMOD:
5326 *total = COSTS_N_INSNS (20);
5327 return true;
5328 case ROTATE:
5329 case ROTATERT:
5330 case ASHIFT:
5331 case ASHIFTRT:
5332 case LSHIFTRT:
5333 *total = COSTS_N_INSNS (4);
5334 return true;
5335 case UNSPEC:
5336 if (XINT (x, 1) == UNSPEC_CONVERT)
5337 *total = COSTS_N_INSNS (0);
5338 else
5339 *total = COSTS_N_INSNS (4);
5340 return true;
5342 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5343 if (GET_MODE_CLASS (mode) == MODE_INT
5344 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5345 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5346 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5347 *total = cost;
5348 return true;
5351 static machine_mode
5352 spu_unwind_word_mode (void)
5354 return SImode;
5357 /* Decide whether we can make a sibling call to a function. DECL is the
5358 declaration of the function being targeted by the call and EXP is the
5359 CALL_EXPR representing the call. */
5360 static bool
5361 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5363 return decl && !TARGET_LARGE_MEM;
5366 /* We need to correctly update the back chain pointer and the Available
5367 Stack Size (which is in the second slot of the sp register.) */
5368 void
5369 spu_allocate_stack (rtx op0, rtx op1)
5371 HOST_WIDE_INT v;
5372 rtx chain = gen_reg_rtx (V4SImode);
5373 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5374 rtx sp = gen_reg_rtx (V4SImode);
5375 rtx splatted = gen_reg_rtx (V4SImode);
5376 rtx pat = gen_reg_rtx (TImode);
5378 /* copy the back chain so we can save it back again. */
5379 emit_move_insn (chain, stack_bot);
5381 op1 = force_reg (SImode, op1);
5383 v = 0x1020300010203ll;
5384 emit_move_insn (pat, immed_double_const (v, v, TImode));
5385 emit_insn (gen_shufb (splatted, op1, op1, pat));
5387 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5388 emit_insn (gen_subv4si3 (sp, sp, splatted));
5390 if (flag_stack_check)
5392 rtx avail = gen_reg_rtx(SImode);
5393 rtx result = gen_reg_rtx(SImode);
5394 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5395 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5396 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5399 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5401 emit_move_insn (stack_bot, chain);
5403 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5406 void
5407 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5409 static unsigned char arr[16] =
5410 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5411 rtx temp = gen_reg_rtx (SImode);
5412 rtx temp2 = gen_reg_rtx (SImode);
5413 rtx temp3 = gen_reg_rtx (V4SImode);
5414 rtx temp4 = gen_reg_rtx (V4SImode);
5415 rtx pat = gen_reg_rtx (TImode);
5416 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5418 /* Restore the backchain from the first word, sp from the second. */
5419 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5420 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5422 emit_move_insn (pat, array_to_constant (TImode, arr));
5424 /* Compute Available Stack Size for sp */
5425 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5426 emit_insn (gen_shufb (temp3, temp, temp, pat));
5428 /* Compute Available Stack Size for back chain */
5429 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5430 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5431 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5433 emit_insn (gen_addv4si3 (sp, sp, temp3));
5434 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5437 static void
5438 spu_init_libfuncs (void)
5440 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5441 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5442 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5443 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5444 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5445 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5446 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5447 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5448 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5449 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5450 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5451 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5453 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5454 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5456 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5457 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5458 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5459 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5460 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5461 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5462 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5463 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5464 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5465 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5466 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5467 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5469 set_optab_libfunc (smul_optab, TImode, "__multi3");
5470 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5471 set_optab_libfunc (smod_optab, TImode, "__modti3");
5472 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5473 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5474 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5477 /* Make a subreg, stripping any existing subreg. We could possibly just
5478 call simplify_subreg, but in this case we know what we want. */
5480 spu_gen_subreg (machine_mode mode, rtx x)
5482 if (GET_CODE (x) == SUBREG)
5483 x = SUBREG_REG (x);
5484 if (GET_MODE (x) == mode)
5485 return x;
5486 return gen_rtx_SUBREG (mode, x, 0);
5489 static bool
5490 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5492 return (TYPE_MODE (type) == BLKmode
5493 && ((type) == 0
5494 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5495 || int_size_in_bytes (type) >
5496 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5499 /* Create the built-in types and functions */
5501 enum spu_function_code
5503 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5504 #include "spu-builtins.def"
5505 #undef DEF_BUILTIN
5506 NUM_SPU_BUILTINS
5509 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5511 struct spu_builtin_description spu_builtins[] = {
5512 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5513 {fcode, icode, name, type, params},
5514 #include "spu-builtins.def"
5515 #undef DEF_BUILTIN
5518 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5520 /* Returns the spu builtin decl for CODE. */
5522 static tree
5523 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5525 if (code >= NUM_SPU_BUILTINS)
5526 return error_mark_node;
5528 return spu_builtin_decls[code];
5532 static void
5533 spu_init_builtins (void)
5535 struct spu_builtin_description *d;
5536 unsigned int i;
5538 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5539 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5540 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5541 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5542 V4SF_type_node = build_vector_type (float_type_node, 4);
5543 V2DF_type_node = build_vector_type (double_type_node, 2);
5545 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5546 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5547 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5548 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5550 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5552 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5561 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5562 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5566 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5567 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5568 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5569 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5570 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5571 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5572 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5574 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5575 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5577 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5579 spu_builtin_types[SPU_BTI_PTR] =
5580 build_pointer_type (build_qualified_type
5581 (void_type_node,
5582 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5584 /* For each builtin we build a new prototype. The tree code will make
5585 sure nodes are shared. */
5586 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5588 tree p;
5589 char name[64]; /* build_function will make a copy. */
5590 int parm;
5592 if (d->name == 0)
5593 continue;
5595 /* Find last parm. */
5596 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5599 p = void_list_node;
5600 while (parm > 1)
5601 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5603 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5605 sprintf (name, "__builtin_%s", d->name);
5606 spu_builtin_decls[i] =
5607 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5608 if (d->fcode == SPU_MASK_FOR_LOAD)
5609 TREE_READONLY (spu_builtin_decls[i]) = 1;
5611 /* These builtins don't throw. */
5612 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5616 void
5617 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5619 static unsigned char arr[16] =
5620 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5622 rtx temp = gen_reg_rtx (Pmode);
5623 rtx temp2 = gen_reg_rtx (V4SImode);
5624 rtx temp3 = gen_reg_rtx (V4SImode);
5625 rtx pat = gen_reg_rtx (TImode);
5626 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5628 emit_move_insn (pat, array_to_constant (TImode, arr));
5630 /* Restore the sp. */
5631 emit_move_insn (temp, op1);
5632 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5634 /* Compute available stack size for sp. */
5635 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5636 emit_insn (gen_shufb (temp3, temp, temp, pat));
5638 emit_insn (gen_addv4si3 (sp, sp, temp3));
5639 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5643 spu_safe_dma (HOST_WIDE_INT channel)
5645 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5648 void
5649 spu_builtin_splats (rtx ops[])
5651 machine_mode mode = GET_MODE (ops[0]);
5652 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5654 unsigned char arr[16];
5655 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5656 emit_move_insn (ops[0], array_to_constant (mode, arr));
5658 else
5660 rtx reg = gen_reg_rtx (TImode);
5661 rtx shuf;
5662 if (GET_CODE (ops[1]) != REG
5663 && GET_CODE (ops[1]) != SUBREG)
5664 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5665 switch (mode)
5667 case V2DImode:
5668 case V2DFmode:
5669 shuf =
5670 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5671 TImode);
5672 break;
5673 case V4SImode:
5674 case V4SFmode:
5675 shuf =
5676 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5677 TImode);
5678 break;
5679 case V8HImode:
5680 shuf =
5681 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5682 TImode);
5683 break;
5684 case V16QImode:
5685 shuf =
5686 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5687 TImode);
5688 break;
5689 default:
5690 abort ();
5692 emit_move_insn (reg, shuf);
5693 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5697 void
5698 spu_builtin_extract (rtx ops[])
5700 machine_mode mode;
5701 rtx rot, from, tmp;
5703 mode = GET_MODE (ops[1]);
5705 if (GET_CODE (ops[2]) == CONST_INT)
5707 switch (mode)
5709 case V16QImode:
5710 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5711 break;
5712 case V8HImode:
5713 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5714 break;
5715 case V4SFmode:
5716 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5717 break;
5718 case V4SImode:
5719 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5720 break;
5721 case V2DImode:
5722 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5723 break;
5724 case V2DFmode:
5725 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5726 break;
5727 default:
5728 abort ();
5730 return;
5733 from = spu_gen_subreg (TImode, ops[1]);
5734 rot = gen_reg_rtx (TImode);
5735 tmp = gen_reg_rtx (SImode);
5737 switch (mode)
5739 case V16QImode:
5740 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5741 break;
5742 case V8HImode:
5743 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5744 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5745 break;
5746 case V4SFmode:
5747 case V4SImode:
5748 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5749 break;
5750 case V2DImode:
5751 case V2DFmode:
5752 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5753 break;
5754 default:
5755 abort ();
5757 emit_insn (gen_rotqby_ti (rot, from, tmp));
5759 emit_insn (gen_spu_convert (ops[0], rot));
5762 void
5763 spu_builtin_insert (rtx ops[])
5765 machine_mode mode = GET_MODE (ops[0]);
5766 machine_mode imode = GET_MODE_INNER (mode);
5767 rtx mask = gen_reg_rtx (TImode);
5768 rtx offset;
5770 if (GET_CODE (ops[3]) == CONST_INT)
5771 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5772 else
5774 offset = gen_reg_rtx (SImode);
5775 emit_insn (gen_mulsi3
5776 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5778 emit_insn (gen_cpat
5779 (mask, stack_pointer_rtx, offset,
5780 GEN_INT (GET_MODE_SIZE (imode))));
5781 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5784 void
5785 spu_builtin_promote (rtx ops[])
5787 machine_mode mode, imode;
5788 rtx rot, from, offset;
5789 HOST_WIDE_INT pos;
5791 mode = GET_MODE (ops[0]);
5792 imode = GET_MODE_INNER (mode);
5794 from = gen_reg_rtx (TImode);
5795 rot = spu_gen_subreg (TImode, ops[0]);
5797 emit_insn (gen_spu_convert (from, ops[1]));
5799 if (GET_CODE (ops[2]) == CONST_INT)
5801 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5802 if (GET_MODE_SIZE (imode) < 4)
5803 pos += 4 - GET_MODE_SIZE (imode);
5804 offset = GEN_INT (pos & 15);
5806 else
5808 offset = gen_reg_rtx (SImode);
5809 switch (mode)
5811 case V16QImode:
5812 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5813 break;
5814 case V8HImode:
5815 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5816 emit_insn (gen_addsi3 (offset, offset, offset));
5817 break;
5818 case V4SFmode:
5819 case V4SImode:
5820 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5821 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5822 break;
5823 case V2DImode:
5824 case V2DFmode:
5825 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5826 break;
5827 default:
5828 abort ();
5831 emit_insn (gen_rotqby_ti (rot, from, offset));
5834 static void
5835 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5837 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5838 rtx shuf = gen_reg_rtx (V4SImode);
5839 rtx insn = gen_reg_rtx (V4SImode);
5840 rtx shufc;
5841 rtx insnc;
5842 rtx mem;
5844 fnaddr = force_reg (SImode, fnaddr);
5845 cxt = force_reg (SImode, cxt);
5847 if (TARGET_LARGE_MEM)
5849 rtx rotl = gen_reg_rtx (V4SImode);
5850 rtx mask = gen_reg_rtx (V4SImode);
5851 rtx bi = gen_reg_rtx (SImode);
5852 static unsigned char const shufa[16] = {
5853 2, 3, 0, 1, 18, 19, 16, 17,
5854 0, 1, 2, 3, 16, 17, 18, 19
5856 static unsigned char const insna[16] = {
5857 0x41, 0, 0, 79,
5858 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5859 0x60, 0x80, 0, 79,
5860 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5863 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5864 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5866 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5867 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5868 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5869 emit_insn (gen_selb (insn, insnc, rotl, mask));
5871 mem = adjust_address (m_tramp, V4SImode, 0);
5872 emit_move_insn (mem, insn);
5874 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5875 mem = adjust_address (m_tramp, Pmode, 16);
5876 emit_move_insn (mem, bi);
5878 else
5880 rtx scxt = gen_reg_rtx (SImode);
5881 rtx sfnaddr = gen_reg_rtx (SImode);
5882 static unsigned char const insna[16] = {
5883 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5884 0x30, 0, 0, 0,
5885 0, 0, 0, 0,
5886 0, 0, 0, 0
5889 shufc = gen_reg_rtx (TImode);
5890 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5892 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5893 fits 18 bits and the last 4 are zeros. This will be true if
5894 the stack pointer is initialized to 0x3fff0 at program start,
5895 otherwise the ila instruction will be garbage. */
5897 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5898 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5899 emit_insn (gen_cpat
5900 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5901 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5902 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5904 mem = adjust_address (m_tramp, V4SImode, 0);
5905 emit_move_insn (mem, insn);
5907 emit_insn (gen_sync ());
5910 static bool
5911 spu_warn_func_return (tree decl)
5913 /* Naked functions are implemented entirely in assembly, including the
5914 return sequence, so suppress warnings about this. */
5915 return !spu_naked_function_p (decl);
5918 void
5919 spu_expand_sign_extend (rtx ops[])
5921 unsigned char arr[16];
5922 rtx pat = gen_reg_rtx (TImode);
5923 rtx sign, c;
5924 int i, last;
5925 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5926 if (GET_MODE (ops[1]) == QImode)
5928 sign = gen_reg_rtx (HImode);
5929 emit_insn (gen_extendqihi2 (sign, ops[1]));
5930 for (i = 0; i < 16; i++)
5931 arr[i] = 0x12;
5932 arr[last] = 0x13;
5934 else
5936 for (i = 0; i < 16; i++)
5937 arr[i] = 0x10;
5938 switch (GET_MODE (ops[1]))
5940 case HImode:
5941 sign = gen_reg_rtx (SImode);
5942 emit_insn (gen_extendhisi2 (sign, ops[1]));
5943 arr[last] = 0x03;
5944 arr[last - 1] = 0x02;
5945 break;
5946 case SImode:
5947 sign = gen_reg_rtx (SImode);
5948 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5949 for (i = 0; i < 4; i++)
5950 arr[last - i] = 3 - i;
5951 break;
5952 case DImode:
5953 sign = gen_reg_rtx (SImode);
5954 c = gen_reg_rtx (SImode);
5955 emit_insn (gen_spu_convert (c, ops[1]));
5956 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5957 for (i = 0; i < 8; i++)
5958 arr[last - i] = 7 - i;
5959 break;
5960 default:
5961 abort ();
5964 emit_move_insn (pat, array_to_constant (TImode, arr));
5965 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5968 /* expand vector initialization. If there are any constant parts,
5969 load constant parts first. Then load any non-constant parts. */
5970 void
5971 spu_expand_vector_init (rtx target, rtx vals)
5973 machine_mode mode = GET_MODE (target);
5974 int n_elts = GET_MODE_NUNITS (mode);
5975 int n_var = 0;
5976 bool all_same = true;
5977 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5978 int i;
5980 first = XVECEXP (vals, 0, 0);
5981 for (i = 0; i < n_elts; ++i)
5983 x = XVECEXP (vals, 0, i);
5984 if (!(CONST_INT_P (x)
5985 || GET_CODE (x) == CONST_DOUBLE
5986 || GET_CODE (x) == CONST_FIXED))
5987 ++n_var;
5988 else
5990 if (first_constant == NULL_RTX)
5991 first_constant = x;
5993 if (i > 0 && !rtx_equal_p (x, first))
5994 all_same = false;
5997 /* if all elements are the same, use splats to repeat elements */
5998 if (all_same)
6000 if (!CONSTANT_P (first)
6001 && !register_operand (first, GET_MODE (x)))
6002 first = force_reg (GET_MODE (first), first);
6003 emit_insn (gen_spu_splats (target, first));
6004 return;
6007 /* load constant parts */
6008 if (n_var != n_elts)
6010 if (n_var == 0)
6012 emit_move_insn (target,
6013 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6015 else
6017 rtx constant_parts_rtx = copy_rtx (vals);
6019 gcc_assert (first_constant != NULL_RTX);
6020 /* fill empty slots with the first constant, this increases
6021 our chance of using splats in the recursive call below. */
6022 for (i = 0; i < n_elts; ++i)
6024 x = XVECEXP (constant_parts_rtx, 0, i);
6025 if (!(CONST_INT_P (x)
6026 || GET_CODE (x) == CONST_DOUBLE
6027 || GET_CODE (x) == CONST_FIXED))
6028 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6031 spu_expand_vector_init (target, constant_parts_rtx);
6035 /* load variable parts */
6036 if (n_var != 0)
6038 rtx insert_operands[4];
6040 insert_operands[0] = target;
6041 insert_operands[2] = target;
6042 for (i = 0; i < n_elts; ++i)
6044 x = XVECEXP (vals, 0, i);
6045 if (!(CONST_INT_P (x)
6046 || GET_CODE (x) == CONST_DOUBLE
6047 || GET_CODE (x) == CONST_FIXED))
6049 if (!register_operand (x, GET_MODE (x)))
6050 x = force_reg (GET_MODE (x), x);
6051 insert_operands[1] = x;
6052 insert_operands[3] = GEN_INT (i);
6053 spu_builtin_insert (insert_operands);
6059 /* Return insn index for the vector compare instruction for given CODE,
6060 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6062 static int
6063 get_vec_cmp_insn (enum rtx_code code,
6064 machine_mode dest_mode,
6065 machine_mode op_mode)
6068 switch (code)
6070 case EQ:
6071 if (dest_mode == V16QImode && op_mode == V16QImode)
6072 return CODE_FOR_ceq_v16qi;
6073 if (dest_mode == V8HImode && op_mode == V8HImode)
6074 return CODE_FOR_ceq_v8hi;
6075 if (dest_mode == V4SImode && op_mode == V4SImode)
6076 return CODE_FOR_ceq_v4si;
6077 if (dest_mode == V4SImode && op_mode == V4SFmode)
6078 return CODE_FOR_ceq_v4sf;
6079 if (dest_mode == V2DImode && op_mode == V2DFmode)
6080 return CODE_FOR_ceq_v2df;
6081 break;
6082 case GT:
6083 if (dest_mode == V16QImode && op_mode == V16QImode)
6084 return CODE_FOR_cgt_v16qi;
6085 if (dest_mode == V8HImode && op_mode == V8HImode)
6086 return CODE_FOR_cgt_v8hi;
6087 if (dest_mode == V4SImode && op_mode == V4SImode)
6088 return CODE_FOR_cgt_v4si;
6089 if (dest_mode == V4SImode && op_mode == V4SFmode)
6090 return CODE_FOR_cgt_v4sf;
6091 if (dest_mode == V2DImode && op_mode == V2DFmode)
6092 return CODE_FOR_cgt_v2df;
6093 break;
6094 case GTU:
6095 if (dest_mode == V16QImode && op_mode == V16QImode)
6096 return CODE_FOR_clgt_v16qi;
6097 if (dest_mode == V8HImode && op_mode == V8HImode)
6098 return CODE_FOR_clgt_v8hi;
6099 if (dest_mode == V4SImode && op_mode == V4SImode)
6100 return CODE_FOR_clgt_v4si;
6101 break;
6102 default:
6103 break;
6105 return -1;
6108 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6109 DMODE is expected destination mode. This is a recursive function. */
6111 static rtx
6112 spu_emit_vector_compare (enum rtx_code rcode,
6113 rtx op0, rtx op1,
6114 machine_mode dmode)
6116 int vec_cmp_insn;
6117 rtx mask;
6118 machine_mode dest_mode;
6119 machine_mode op_mode = GET_MODE (op1);
6121 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6123 /* Floating point vector compare instructions uses destination V4SImode.
6124 Double floating point vector compare instructions uses destination V2DImode.
6125 Move destination to appropriate mode later. */
6126 if (dmode == V4SFmode)
6127 dest_mode = V4SImode;
6128 else if (dmode == V2DFmode)
6129 dest_mode = V2DImode;
6130 else
6131 dest_mode = dmode;
6133 mask = gen_reg_rtx (dest_mode);
6134 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6136 if (vec_cmp_insn == -1)
6138 bool swap_operands = false;
6139 bool try_again = false;
6140 switch (rcode)
6142 case LT:
6143 rcode = GT;
6144 swap_operands = true;
6145 try_again = true;
6146 break;
6147 case LTU:
6148 rcode = GTU;
6149 swap_operands = true;
6150 try_again = true;
6151 break;
6152 case NE:
6153 case UNEQ:
6154 case UNLE:
6155 case UNLT:
6156 case UNGE:
6157 case UNGT:
6158 case UNORDERED:
6159 /* Treat A != B as ~(A==B). */
6161 enum rtx_code rev_code;
6162 enum insn_code nor_code;
6163 rtx rev_mask;
6165 rev_code = reverse_condition_maybe_unordered (rcode);
6166 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6168 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6169 gcc_assert (nor_code != CODE_FOR_nothing);
6170 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6171 if (dmode != dest_mode)
6173 rtx temp = gen_reg_rtx (dest_mode);
6174 convert_move (temp, mask, 0);
6175 return temp;
6177 return mask;
6179 break;
6180 case GE:
6181 case GEU:
6182 case LE:
6183 case LEU:
6184 /* Try GT/GTU/LT/LTU OR EQ */
6186 rtx c_rtx, eq_rtx;
6187 enum insn_code ior_code;
6188 enum rtx_code new_code;
6190 switch (rcode)
6192 case GE: new_code = GT; break;
6193 case GEU: new_code = GTU; break;
6194 case LE: new_code = LT; break;
6195 case LEU: new_code = LTU; break;
6196 default:
6197 gcc_unreachable ();
6200 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6201 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6203 ior_code = optab_handler (ior_optab, dest_mode);
6204 gcc_assert (ior_code != CODE_FOR_nothing);
6205 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6206 if (dmode != dest_mode)
6208 rtx temp = gen_reg_rtx (dest_mode);
6209 convert_move (temp, mask, 0);
6210 return temp;
6212 return mask;
6214 break;
6215 case LTGT:
6216 /* Try LT OR GT */
6218 rtx lt_rtx, gt_rtx;
6219 enum insn_code ior_code;
6221 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6222 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6224 ior_code = optab_handler (ior_optab, dest_mode);
6225 gcc_assert (ior_code != CODE_FOR_nothing);
6226 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6227 if (dmode != dest_mode)
6229 rtx temp = gen_reg_rtx (dest_mode);
6230 convert_move (temp, mask, 0);
6231 return temp;
6233 return mask;
6235 break;
6236 case ORDERED:
6237 /* Implement as (A==A) & (B==B) */
6239 rtx a_rtx, b_rtx;
6240 enum insn_code and_code;
6242 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6243 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6245 and_code = optab_handler (and_optab, dest_mode);
6246 gcc_assert (and_code != CODE_FOR_nothing);
6247 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6248 if (dmode != dest_mode)
6250 rtx temp = gen_reg_rtx (dest_mode);
6251 convert_move (temp, mask, 0);
6252 return temp;
6254 return mask;
6256 break;
6257 default:
6258 gcc_unreachable ();
6261 /* You only get two chances. */
6262 if (try_again)
6263 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6265 gcc_assert (vec_cmp_insn != -1);
6267 if (swap_operands)
6269 rtx tmp;
6270 tmp = op0;
6271 op0 = op1;
6272 op1 = tmp;
6276 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6277 if (dmode != dest_mode)
6279 rtx temp = gen_reg_rtx (dest_mode);
6280 convert_move (temp, mask, 0);
6281 return temp;
6283 return mask;
6287 /* Emit vector conditional expression.
6288 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6289 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6292 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6293 rtx cond, rtx cc_op0, rtx cc_op1)
6295 machine_mode dest_mode = GET_MODE (dest);
6296 enum rtx_code rcode = GET_CODE (cond);
6297 rtx mask;
6299 /* Get the vector mask for the given relational operations. */
6300 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6302 emit_insn(gen_selb (dest, op2, op1, mask));
6304 return 1;
6307 static rtx
6308 spu_force_reg (machine_mode mode, rtx op)
6310 rtx x, r;
6311 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6313 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6314 || GET_MODE (op) == BLKmode)
6315 return force_reg (mode, convert_to_mode (mode, op, 0));
6316 abort ();
6319 r = force_reg (GET_MODE (op), op);
6320 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6322 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6323 if (x)
6324 return x;
6327 x = gen_reg_rtx (mode);
6328 emit_insn (gen_spu_convert (x, r));
6329 return x;
6332 static void
6333 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6335 HOST_WIDE_INT v = 0;
6336 int lsbits;
6337 /* Check the range of immediate operands. */
6338 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6340 int range = p - SPU_BTI_7;
6342 if (!CONSTANT_P (op))
6343 error ("%s expects an integer literal in the range [%d, %d]",
6344 d->name,
6345 spu_builtin_range[range].low, spu_builtin_range[range].high);
6347 if (GET_CODE (op) == CONST
6348 && (GET_CODE (XEXP (op, 0)) == PLUS
6349 || GET_CODE (XEXP (op, 0)) == MINUS))
6351 v = INTVAL (XEXP (XEXP (op, 0), 1));
6352 op = XEXP (XEXP (op, 0), 0);
6354 else if (GET_CODE (op) == CONST_INT)
6355 v = INTVAL (op);
6356 else if (GET_CODE (op) == CONST_VECTOR
6357 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6358 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6360 /* The default for v is 0 which is valid in every range. */
6361 if (v < spu_builtin_range[range].low
6362 || v > spu_builtin_range[range].high)
6363 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6364 d->name,
6365 spu_builtin_range[range].low, spu_builtin_range[range].high,
6368 switch (p)
6370 case SPU_BTI_S10_4:
6371 lsbits = 4;
6372 break;
6373 case SPU_BTI_U16_2:
6374 /* This is only used in lqa, and stqa. Even though the insns
6375 encode 16 bits of the address (all but the 2 least
6376 significant), only 14 bits are used because it is masked to
6377 be 16 byte aligned. */
6378 lsbits = 4;
6379 break;
6380 case SPU_BTI_S16_2:
6381 /* This is used for lqr and stqr. */
6382 lsbits = 2;
6383 break;
6384 default:
6385 lsbits = 0;
6388 if (GET_CODE (op) == LABEL_REF
6389 || (GET_CODE (op) == SYMBOL_REF
6390 && SYMBOL_REF_FUNCTION_P (op))
6391 || (v & ((1 << lsbits) - 1)) != 0)
6392 warning (0, "%d least significant bits of %s are ignored", lsbits,
6393 d->name);
6398 static int
6399 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6400 rtx target, rtx ops[])
6402 enum insn_code icode = (enum insn_code) d->icode;
6403 int i = 0, a;
6405 /* Expand the arguments into rtl. */
6407 if (d->parm[0] != SPU_BTI_VOID)
6408 ops[i++] = target;
6410 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6412 tree arg = CALL_EXPR_ARG (exp, a);
6413 if (arg == 0)
6414 abort ();
6415 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6418 gcc_assert (i == insn_data[icode].n_generator_args);
6419 return i;
6422 static rtx
6423 spu_expand_builtin_1 (struct spu_builtin_description *d,
6424 tree exp, rtx target)
6426 rtx pat;
6427 rtx ops[8];
6428 enum insn_code icode = (enum insn_code) d->icode;
6429 machine_mode mode, tmode;
6430 int i, p;
6431 int n_operands;
6432 tree return_type;
6434 /* Set up ops[] with values from arglist. */
6435 n_operands = expand_builtin_args (d, exp, target, ops);
6437 /* Handle the target operand which must be operand 0. */
6438 i = 0;
6439 if (d->parm[0] != SPU_BTI_VOID)
6442 /* We prefer the mode specified for the match_operand otherwise
6443 use the mode from the builtin function prototype. */
6444 tmode = insn_data[d->icode].operand[0].mode;
6445 if (tmode == VOIDmode)
6446 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6448 /* Try to use target because not using it can lead to extra copies
6449 and when we are using all of the registers extra copies leads
6450 to extra spills. */
6451 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6452 ops[0] = target;
6453 else
6454 target = ops[0] = gen_reg_rtx (tmode);
6456 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6457 abort ();
6459 i++;
6462 if (d->fcode == SPU_MASK_FOR_LOAD)
6464 machine_mode mode = insn_data[icode].operand[1].mode;
6465 tree arg;
6466 rtx addr, op, pat;
6468 /* get addr */
6469 arg = CALL_EXPR_ARG (exp, 0);
6470 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6471 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6472 addr = memory_address (mode, op);
6474 /* negate addr */
6475 op = gen_reg_rtx (GET_MODE (addr));
6476 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6477 op = gen_rtx_MEM (mode, op);
6479 pat = GEN_FCN (icode) (target, op);
6480 if (!pat)
6481 return 0;
6482 emit_insn (pat);
6483 return target;
6486 /* Ignore align_hint, but still expand it's args in case they have
6487 side effects. */
6488 if (icode == CODE_FOR_spu_align_hint)
6489 return 0;
6491 /* Handle the rest of the operands. */
6492 for (p = 1; i < n_operands; i++, p++)
6494 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6495 mode = insn_data[d->icode].operand[i].mode;
6496 else
6497 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6499 /* mode can be VOIDmode here for labels */
6501 /* For specific intrinsics with an immediate operand, e.g.,
6502 si_ai(), we sometimes need to convert the scalar argument to a
6503 vector argument by splatting the scalar. */
6504 if (VECTOR_MODE_P (mode)
6505 && (GET_CODE (ops[i]) == CONST_INT
6506 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6507 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6509 if (GET_CODE (ops[i]) == CONST_INT)
6510 ops[i] = spu_const (mode, INTVAL (ops[i]));
6511 else
6513 rtx reg = gen_reg_rtx (mode);
6514 machine_mode imode = GET_MODE_INNER (mode);
6515 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6516 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6517 if (imode != GET_MODE (ops[i]))
6518 ops[i] = convert_to_mode (imode, ops[i],
6519 TYPE_UNSIGNED (spu_builtin_types
6520 [d->parm[i]]));
6521 emit_insn (gen_spu_splats (reg, ops[i]));
6522 ops[i] = reg;
6526 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6528 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6529 ops[i] = spu_force_reg (mode, ops[i]);
6532 switch (n_operands)
6534 case 0:
6535 pat = GEN_FCN (icode) (0);
6536 break;
6537 case 1:
6538 pat = GEN_FCN (icode) (ops[0]);
6539 break;
6540 case 2:
6541 pat = GEN_FCN (icode) (ops[0], ops[1]);
6542 break;
6543 case 3:
6544 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6545 break;
6546 case 4:
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6548 break;
6549 case 5:
6550 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6551 break;
6552 case 6:
6553 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6554 break;
6555 default:
6556 abort ();
6559 if (!pat)
6560 abort ();
6562 if (d->type == B_CALL || d->type == B_BISLED)
6563 emit_call_insn (pat);
6564 else if (d->type == B_JUMP)
6566 emit_jump_insn (pat);
6567 emit_barrier ();
6569 else
6570 emit_insn (pat);
6572 return_type = spu_builtin_types[d->parm[0]];
6573 if (d->parm[0] != SPU_BTI_VOID
6574 && GET_MODE (target) != TYPE_MODE (return_type))
6576 /* target is the return value. It should always be the mode of
6577 the builtin function prototype. */
6578 target = spu_force_reg (TYPE_MODE (return_type), target);
6581 return target;
6585 spu_expand_builtin (tree exp,
6586 rtx target,
6587 rtx subtarget ATTRIBUTE_UNUSED,
6588 machine_mode mode ATTRIBUTE_UNUSED,
6589 int ignore ATTRIBUTE_UNUSED)
6591 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6592 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6593 struct spu_builtin_description *d;
6595 if (fcode < NUM_SPU_BUILTINS)
6597 d = &spu_builtins[fcode];
6599 return spu_expand_builtin_1 (d, exp, target);
6601 abort ();
6604 /* Implement targetm.vectorize.builtin_mask_for_load. */
6605 static tree
6606 spu_builtin_mask_for_load (void)
6608 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6611 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6612 static int
6613 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6614 tree vectype,
6615 int misalign ATTRIBUTE_UNUSED)
6617 unsigned elements;
6619 switch (type_of_cost)
6621 case scalar_stmt:
6622 case vector_stmt:
6623 case vector_load:
6624 case vector_store:
6625 case vec_to_scalar:
6626 case scalar_to_vec:
6627 case cond_branch_not_taken:
6628 case vec_perm:
6629 case vec_promote_demote:
6630 return 1;
6632 case scalar_store:
6633 return 10;
6635 case scalar_load:
6636 /* Load + rotate. */
6637 return 2;
6639 case unaligned_load:
6640 return 2;
6642 case cond_branch_taken:
6643 return 6;
6645 case vec_construct:
6646 elements = TYPE_VECTOR_SUBPARTS (vectype);
6647 return elements / 2 + 1;
6649 default:
6650 gcc_unreachable ();
6654 /* Implement targetm.vectorize.init_cost. */
6656 static void *
6657 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6659 unsigned *cost = XNEWVEC (unsigned, 3);
6660 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6661 return cost;
6664 /* Implement targetm.vectorize.add_stmt_cost. */
6666 static unsigned
6667 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6668 struct _stmt_vec_info *stmt_info, int misalign,
6669 enum vect_cost_model_location where)
6671 unsigned *cost = (unsigned *) data;
6672 unsigned retval = 0;
6674 if (flag_vect_cost_model)
6676 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6677 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6679 /* Statements in an inner loop relative to the loop being
6680 vectorized are weighted more heavily. The value here is
6681 arbitrary and could potentially be improved with analysis. */
6682 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6683 count *= 50; /* FIXME. */
6685 retval = (unsigned) (count * stmt_cost);
6686 cost[where] += retval;
6689 return retval;
6692 /* Implement targetm.vectorize.finish_cost. */
6694 static void
6695 spu_finish_cost (void *data, unsigned *prologue_cost,
6696 unsigned *body_cost, unsigned *epilogue_cost)
6698 unsigned *cost = (unsigned *) data;
6699 *prologue_cost = cost[vect_prologue];
6700 *body_cost = cost[vect_body];
6701 *epilogue_cost = cost[vect_epilogue];
6704 /* Implement targetm.vectorize.destroy_cost_data. */
6706 static void
6707 spu_destroy_cost_data (void *data)
6709 free (data);
6712 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6713 after applying N number of iterations. This routine does not determine
6714 how may iterations are required to reach desired alignment. */
6716 static bool
6717 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6719 if (is_packed)
6720 return false;
6722 /* All other types are naturally aligned. */
6723 return true;
6726 /* Return the appropriate mode for a named address pointer. */
6727 static machine_mode
6728 spu_addr_space_pointer_mode (addr_space_t addrspace)
6730 switch (addrspace)
6732 case ADDR_SPACE_GENERIC:
6733 return ptr_mode;
6734 case ADDR_SPACE_EA:
6735 return EAmode;
6736 default:
6737 gcc_unreachable ();
6741 /* Return the appropriate mode for a named address address. */
6742 static machine_mode
6743 spu_addr_space_address_mode (addr_space_t addrspace)
6745 switch (addrspace)
6747 case ADDR_SPACE_GENERIC:
6748 return Pmode;
6749 case ADDR_SPACE_EA:
6750 return EAmode;
6751 default:
6752 gcc_unreachable ();
6756 /* Determine if one named address space is a subset of another. */
6758 static bool
6759 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6761 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6762 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6764 if (subset == superset)
6765 return true;
6767 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6768 being subsets but instead as disjoint address spaces. */
6769 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6770 return false;
6772 else
6773 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6776 /* Convert from one address space to another. */
6777 static rtx
6778 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6780 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6781 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6783 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6784 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6786 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6788 rtx result, ls;
6790 ls = gen_const_mem (DImode,
6791 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6792 set_mem_align (ls, 128);
6794 result = gen_reg_rtx (Pmode);
6795 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6796 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6797 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6798 ls, const0_rtx, Pmode, 1);
6800 emit_insn (gen_subsi3 (result, op, ls));
6802 return result;
6805 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6807 rtx result, ls;
6809 ls = gen_const_mem (DImode,
6810 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6811 set_mem_align (ls, 128);
6813 result = gen_reg_rtx (EAmode);
6814 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6815 op = force_reg (Pmode, op);
6816 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6817 ls, const0_rtx, EAmode, 1);
6818 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6820 if (EAmode == SImode)
6821 emit_insn (gen_addsi3 (result, op, ls));
6822 else
6823 emit_insn (gen_adddi3 (result, op, ls));
6825 return result;
6828 else
6829 gcc_unreachable ();
6833 /* Count the total number of instructions in each pipe and return the
6834 maximum, which is used as the Minimum Iteration Interval (MII)
6835 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6836 -2 are instructions that can go in pipe0 or pipe1. */
6837 static int
6838 spu_sms_res_mii (struct ddg *g)
6840 int i;
6841 unsigned t[4] = {0, 0, 0, 0};
6843 for (i = 0; i < g->num_nodes; i++)
6845 rtx_insn *insn = g->nodes[i].insn;
6846 int p = get_pipe (insn) + 2;
6848 gcc_assert (p >= 0);
6849 gcc_assert (p < 4);
6851 t[p]++;
6852 if (dump_file && INSN_P (insn))
6853 fprintf (dump_file, "i%d %s %d %d\n",
6854 INSN_UID (insn),
6855 insn_data[INSN_CODE(insn)].name,
6856 p, t[p]);
6858 if (dump_file)
6859 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6861 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6865 void
6866 spu_init_expanders (void)
6868 if (cfun)
6870 rtx r0, r1;
6871 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6872 frame_pointer_needed is true. We don't know that until we're
6873 expanding the prologue. */
6874 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6876 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6877 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6878 to be treated as aligned, so generate them here. */
6879 r0 = gen_reg_rtx (SImode);
6880 r1 = gen_reg_rtx (SImode);
6881 mark_reg_pointer (r0, 128);
6882 mark_reg_pointer (r1, 128);
6883 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6884 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6888 static machine_mode
6889 spu_libgcc_cmp_return_mode (void)
6892 /* For SPU word mode is TI mode so it is better to use SImode
6893 for compare returns. */
6894 return SImode;
6897 static machine_mode
6898 spu_libgcc_shift_count_mode (void)
6900 /* For SPU word mode is TI mode so it is better to use SImode
6901 for shift counts. */
6902 return SImode;
6905 /* Implement targetm.section_type_flags. */
6906 static unsigned int
6907 spu_section_type_flags (tree decl, const char *name, int reloc)
6909 /* .toe needs to have type @nobits. */
6910 if (strcmp (name, ".toe") == 0)
6911 return SECTION_BSS;
6912 /* Don't load _ea into the current address space. */
6913 if (strcmp (name, "._ea") == 0)
6914 return SECTION_WRITE | SECTION_DEBUG;
6915 return default_section_type_flags (decl, name, reloc);
6918 /* Implement targetm.select_section. */
6919 static section *
6920 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6922 /* Variables and constants defined in the __ea address space
6923 go into a special section named "._ea". */
6924 if (TREE_TYPE (decl) != error_mark_node
6925 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6927 /* We might get called with string constants, but get_named_section
6928 doesn't like them as they are not DECLs. Also, we need to set
6929 flags in that case. */
6930 if (!DECL_P (decl))
6931 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6933 return get_named_section (decl, "._ea", reloc);
6936 return default_elf_select_section (decl, reloc, align);
6939 /* Implement targetm.unique_section. */
6940 static void
6941 spu_unique_section (tree decl, int reloc)
6943 /* We don't support unique section names in the __ea address
6944 space for now. */
6945 if (TREE_TYPE (decl) != error_mark_node
6946 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6947 return;
6949 default_unique_section (decl, reloc);
6952 /* Generate a constant or register which contains 2^SCALE. We assume
6953 the result is valid for MODE. Currently, MODE must be V4SFmode and
6954 SCALE must be SImode. */
6956 spu_gen_exp2 (machine_mode mode, rtx scale)
6958 gcc_assert (mode == V4SFmode);
6959 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6960 if (GET_CODE (scale) != CONST_INT)
6962 /* unsigned int exp = (127 + scale) << 23;
6963 __vector float m = (__vector float) spu_splats (exp); */
6964 rtx reg = force_reg (SImode, scale);
6965 rtx exp = gen_reg_rtx (SImode);
6966 rtx mul = gen_reg_rtx (mode);
6967 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6968 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6969 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6970 return mul;
6972 else
6974 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6975 unsigned char arr[16];
6976 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6977 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6978 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6979 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6980 return array_to_constant (mode, arr);
6984 /* After reload, just change the convert into a move instruction
6985 or a dead instruction. */
6986 void
6987 spu_split_convert (rtx ops[])
6989 if (REGNO (ops[0]) == REGNO (ops[1]))
6990 emit_note (NOTE_INSN_DELETED);
6991 else
6993 /* Use TImode always as this might help hard reg copyprop. */
6994 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6995 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6996 emit_insn (gen_move_insn (op0, op1));
7000 void
7001 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7003 fprintf (file, "# profile\n");
7004 fprintf (file, "brsl $75, _mcount\n");
7007 /* Implement targetm.ref_may_alias_errno. */
7008 static bool
7009 spu_ref_may_alias_errno (ao_ref *ref)
7011 tree base = ao_ref_base (ref);
7013 /* With SPU newlib, errno is defined as something like
7014 _impure_data._errno
7015 The default implementation of this target macro does not
7016 recognize such expressions, so special-code for it here. */
7018 if (TREE_CODE (base) == VAR_DECL
7019 && !TREE_STATIC (base)
7020 && DECL_EXTERNAL (base)
7021 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7022 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7023 "_impure_data") == 0
7024 /* _errno is the first member of _impure_data. */
7025 && ref->offset == 0)
7026 return true;
7028 return default_ref_may_alias_errno (ref);
7031 /* Output thunk to FILE that implements a C++ virtual function call (with
7032 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7033 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7034 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7035 relative to the resulting this pointer. */
7037 static void
7038 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7039 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7040 tree function)
7042 rtx op[8];
7044 /* Make sure unwind info is emitted for the thunk if needed. */
7045 final_start_function (emit_barrier (), file, 1);
7047 /* Operand 0 is the target function. */
7048 op[0] = XEXP (DECL_RTL (function), 0);
7050 /* Operand 1 is the 'this' pointer. */
7051 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7052 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7053 else
7054 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7056 /* Operands 2/3 are the low/high halfwords of delta. */
7057 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7058 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7060 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7061 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7062 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7064 /* Operands 6/7 are temporary registers. */
7065 op[6] = gen_rtx_REG (Pmode, 79);
7066 op[7] = gen_rtx_REG (Pmode, 78);
7068 /* Add DELTA to this pointer. */
7069 if (delta)
7071 if (delta >= -0x200 && delta < 0x200)
7072 output_asm_insn ("ai\t%1,%1,%2", op);
7073 else if (delta >= -0x8000 && delta < 0x8000)
7075 output_asm_insn ("il\t%6,%2", op);
7076 output_asm_insn ("a\t%1,%1,%6", op);
7078 else
7080 output_asm_insn ("ilhu\t%6,%3", op);
7081 output_asm_insn ("iohl\t%6,%2", op);
7082 output_asm_insn ("a\t%1,%1,%6", op);
7086 /* Perform vcall adjustment. */
7087 if (vcall_offset)
7089 output_asm_insn ("lqd\t%7,0(%1)", op);
7090 output_asm_insn ("rotqby\t%7,%7,%1", op);
7092 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7093 output_asm_insn ("ai\t%7,%7,%4", op);
7094 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7096 output_asm_insn ("il\t%6,%4", op);
7097 output_asm_insn ("a\t%7,%7,%6", op);
7099 else
7101 output_asm_insn ("ilhu\t%6,%5", op);
7102 output_asm_insn ("iohl\t%6,%4", op);
7103 output_asm_insn ("a\t%7,%7,%6", op);
7106 output_asm_insn ("lqd\t%6,0(%7)", op);
7107 output_asm_insn ("rotqby\t%6,%6,%7", op);
7108 output_asm_insn ("a\t%1,%1,%6", op);
7111 /* Jump to target. */
7112 output_asm_insn ("br\t%0", op);
7114 final_end_function ();
7117 /* Canonicalize a comparison from one we don't have to one we do have. */
7118 static void
7119 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7120 bool op0_preserve_value)
7122 if (!op0_preserve_value
7123 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7125 rtx tem = *op0;
7126 *op0 = *op1;
7127 *op1 = tem;
7128 *code = (int)swap_condition ((enum rtx_code)*code);
7132 /* Table of machine attributes. */
7133 static const struct attribute_spec spu_attribute_table[] =
7135 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7136 affects_type_identity } */
7137 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7138 false },
7139 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7140 false },
7141 { NULL, 0, 0, false, false, false, NULL, false }
7144 /* TARGET overrides. */
7146 #undef TARGET_ADDR_SPACE_POINTER_MODE
7147 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7149 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7150 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7152 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7153 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7154 spu_addr_space_legitimate_address_p
7156 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7157 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7159 #undef TARGET_ADDR_SPACE_SUBSET_P
7160 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7162 #undef TARGET_ADDR_SPACE_CONVERT
7163 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7165 #undef TARGET_INIT_BUILTINS
7166 #define TARGET_INIT_BUILTINS spu_init_builtins
7167 #undef TARGET_BUILTIN_DECL
7168 #define TARGET_BUILTIN_DECL spu_builtin_decl
7170 #undef TARGET_EXPAND_BUILTIN
7171 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7173 #undef TARGET_UNWIND_WORD_MODE
7174 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7176 #undef TARGET_LEGITIMIZE_ADDRESS
7177 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7179 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7180 and .quad for the debugger. When it is known that the assembler is fixed,
7181 these can be removed. */
7182 #undef TARGET_ASM_UNALIGNED_SI_OP
7183 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7185 #undef TARGET_ASM_ALIGNED_DI_OP
7186 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7188 /* The .8byte directive doesn't seem to work well for a 32 bit
7189 architecture. */
7190 #undef TARGET_ASM_UNALIGNED_DI_OP
7191 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7193 #undef TARGET_RTX_COSTS
7194 #define TARGET_RTX_COSTS spu_rtx_costs
7196 #undef TARGET_ADDRESS_COST
7197 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7199 #undef TARGET_SCHED_ISSUE_RATE
7200 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7202 #undef TARGET_SCHED_INIT_GLOBAL
7203 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7205 #undef TARGET_SCHED_INIT
7206 #define TARGET_SCHED_INIT spu_sched_init
7208 #undef TARGET_SCHED_VARIABLE_ISSUE
7209 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7211 #undef TARGET_SCHED_REORDER
7212 #define TARGET_SCHED_REORDER spu_sched_reorder
7214 #undef TARGET_SCHED_REORDER2
7215 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7217 #undef TARGET_SCHED_ADJUST_COST
7218 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7220 #undef TARGET_ATTRIBUTE_TABLE
7221 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7223 #undef TARGET_ASM_INTEGER
7224 #define TARGET_ASM_INTEGER spu_assemble_integer
7226 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7227 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7229 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7230 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7232 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7233 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7235 #undef TARGET_ASM_GLOBALIZE_LABEL
7236 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7238 #undef TARGET_PASS_BY_REFERENCE
7239 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7241 #undef TARGET_FUNCTION_ARG
7242 #define TARGET_FUNCTION_ARG spu_function_arg
7244 #undef TARGET_FUNCTION_ARG_ADVANCE
7245 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7247 #undef TARGET_MUST_PASS_IN_STACK
7248 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7250 #undef TARGET_BUILD_BUILTIN_VA_LIST
7251 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7253 #undef TARGET_EXPAND_BUILTIN_VA_START
7254 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7256 #undef TARGET_SETUP_INCOMING_VARARGS
7257 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7259 #undef TARGET_MACHINE_DEPENDENT_REORG
7260 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7262 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7263 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7265 #undef TARGET_INIT_LIBFUNCS
7266 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7268 #undef TARGET_RETURN_IN_MEMORY
7269 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7271 #undef TARGET_ENCODE_SECTION_INFO
7272 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7274 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7275 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7277 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7278 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7280 #undef TARGET_VECTORIZE_INIT_COST
7281 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7283 #undef TARGET_VECTORIZE_ADD_STMT_COST
7284 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7286 #undef TARGET_VECTORIZE_FINISH_COST
7287 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7289 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7290 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7292 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7293 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7295 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7296 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7298 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7299 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7301 #undef TARGET_SCHED_SMS_RES_MII
7302 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7304 #undef TARGET_SECTION_TYPE_FLAGS
7305 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7307 #undef TARGET_ASM_SELECT_SECTION
7308 #define TARGET_ASM_SELECT_SECTION spu_select_section
7310 #undef TARGET_ASM_UNIQUE_SECTION
7311 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7313 #undef TARGET_LEGITIMATE_ADDRESS_P
7314 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7316 #undef TARGET_LEGITIMATE_CONSTANT_P
7317 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7319 #undef TARGET_TRAMPOLINE_INIT
7320 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7322 #undef TARGET_WARN_FUNC_RETURN
7323 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7325 #undef TARGET_OPTION_OVERRIDE
7326 #define TARGET_OPTION_OVERRIDE spu_option_override
7328 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7329 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7331 #undef TARGET_REF_MAY_ALIAS_ERRNO
7332 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7334 #undef TARGET_ASM_OUTPUT_MI_THUNK
7335 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7336 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7337 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7339 /* Variable tracking should be run after all optimizations which
7340 change order of insns. It also needs a valid CFG. */
7341 #undef TARGET_DELAY_VARTRACK
7342 #define TARGET_DELAY_VARTRACK true
7344 #undef TARGET_CANONICALIZE_COMPARISON
7345 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7347 #undef TARGET_CAN_USE_DOLOOP_P
7348 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7350 struct gcc_target targetm = TARGET_INITIALIZER;
7352 #include "gt-spu.h"