Reverting merge from trunk
[official-gcc.git] / gcc / config / spu / spu.c
blobe344b73fce68f60c84f667570223471b4f4861b1
1 /* Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "tree.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "except.h"
34 #include "function.h"
35 #include "output.h"
36 #include "basic-block.h"
37 #include "diagnostic-core.h"
38 #include "ggc.h"
39 #include "hashtab.h"
40 #include "tm_p.h"
41 #include "target.h"
42 #include "target-def.h"
43 #include "langhooks.h"
44 #include "reload.h"
45 #include "sched-int.h"
46 #include "params.h"
47 #include "machmode.h"
48 #include "gimple.h"
49 #include "gimplify.h"
50 #include "tm-constrs.h"
51 #include "ddg.h"
52 #include "sbitmap.h"
53 #include "timevar.h"
54 #include "df.h"
55 #include "dumpfile.h"
56 #include "cfgloop.h"
58 /* Builtin types, data and prototypes. */
60 enum spu_builtin_type_index
62 SPU_BTI_END_OF_PARAMS,
64 /* We create new type nodes for these. */
65 SPU_BTI_V16QI,
66 SPU_BTI_V8HI,
67 SPU_BTI_V4SI,
68 SPU_BTI_V2DI,
69 SPU_BTI_V4SF,
70 SPU_BTI_V2DF,
71 SPU_BTI_UV16QI,
72 SPU_BTI_UV8HI,
73 SPU_BTI_UV4SI,
74 SPU_BTI_UV2DI,
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
77 SPU_BTI_QUADWORD,
79 /* These all correspond to intSI_type_node */
80 SPU_BTI_7,
81 SPU_BTI_S7,
82 SPU_BTI_U7,
83 SPU_BTI_S10,
84 SPU_BTI_S10_4,
85 SPU_BTI_U14,
86 SPU_BTI_16,
87 SPU_BTI_S16,
88 SPU_BTI_S16_2,
89 SPU_BTI_U16,
90 SPU_BTI_U16_2,
91 SPU_BTI_U18,
93 /* These correspond to the standard types */
94 SPU_BTI_INTQI,
95 SPU_BTI_INTHI,
96 SPU_BTI_INTSI,
97 SPU_BTI_INTDI,
99 SPU_BTI_UINTQI,
100 SPU_BTI_UINTHI,
101 SPU_BTI_UINTSI,
102 SPU_BTI_UINTDI,
104 SPU_BTI_FLOAT,
105 SPU_BTI_DOUBLE,
107 SPU_BTI_VOID,
108 SPU_BTI_PTR,
110 SPU_BTI_MAX
113 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126 struct spu_builtin_range
128 int low, high;
131 static struct spu_builtin_range spu_builtin_range[] = {
132 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
133 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll}, /* SPU_BTI_U7 */
135 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
136 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
138 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
139 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
140 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
147 /* Target specific attribute specifications. */
148 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150 /* Prototypes and external defs. */
151 static int get_pipe (rtx insn);
152 static int spu_naked_function_p (tree func);
153 static int mem_is_padded_component_ref (rtx x);
154 static void fix_range (const char *);
155 static rtx spu_expand_load (rtx, rtx, rtx, int);
157 /* Which instruction set architecture to use. */
158 int spu_arch;
159 /* Which cpu are we tuning for. */
160 int spu_tune;
162 /* The hardware requires 8 insns between a hint and the branch it
163 effects. This variable describes how many rtl instructions the
164 compiler needs to see before inserting a hint, and then the compiler
165 will insert enough nops to make it at least 8 insns. The default is
166 for the compiler to allow up to 2 nops be emitted. The nops are
167 inserted in pairs, so we round down. */
168 int spu_hint_dist = (8*4) - (2*4);
170 enum spu_immediate {
171 SPU_NONE,
172 SPU_IL,
173 SPU_ILA,
174 SPU_ILH,
175 SPU_ILHU,
176 SPU_ORI,
177 SPU_ORHI,
178 SPU_ORBI,
179 SPU_IOHL
181 enum immediate_class
183 IC_POOL, /* constant pool */
184 IC_IL1, /* one il* instruction */
185 IC_IL2, /* both ilhu and iohl instructions */
186 IC_IL1s, /* one il* instruction */
187 IC_IL2s, /* both ilhu and iohl instructions */
188 IC_FSMBI, /* the fsmbi instruction */
189 IC_CPAT, /* one of the c*d instructions */
190 IC_FSMBI2 /* fsmbi plus 1 other instruction */
193 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
194 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
195 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
196 static enum immediate_class classify_immediate (rtx op,
197 enum machine_mode mode);
199 /* Pointer mode for __ea references. */
200 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
203 /* Define the structure for the machine field in struct function. */
204 struct GTY(()) machine_function
206 /* Register to use for PIC accesses. */
207 rtx pic_reg;
210 /* How to allocate a 'struct machine_function'. */
211 static struct machine_function *
212 spu_init_machine_status (void)
214 return ggc_alloc_cleared_machine_function ();
217 /* Implement TARGET_OPTION_OVERRIDE. */
218 static void
219 spu_option_override (void)
221 /* Set up function hooks. */
222 init_machine_status = spu_init_machine_status;
224 /* Small loops will be unpeeled at -O3. For SPU it is more important
225 to keep code small by default. */
226 if (!flag_unroll_loops && !flag_peel_loops)
227 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
228 global_options.x_param_values,
229 global_options_set.x_param_values);
231 flag_omit_frame_pointer = 1;
233 /* Functions must be 8 byte aligned so we correctly handle dual issue */
234 if (align_functions < 8)
235 align_functions = 8;
237 spu_hint_dist = 8*4 - spu_max_nops*4;
238 if (spu_hint_dist < 0)
239 spu_hint_dist = 0;
241 if (spu_fixed_range_string)
242 fix_range (spu_fixed_range_string);
244 /* Determine processor architectural level. */
245 if (spu_arch_string)
247 if (strcmp (&spu_arch_string[0], "cell") == 0)
248 spu_arch = PROCESSOR_CELL;
249 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
250 spu_arch = PROCESSOR_CELLEDP;
251 else
252 error ("bad value (%s) for -march= switch", spu_arch_string);
255 /* Determine processor to tune for. */
256 if (spu_tune_string)
258 if (strcmp (&spu_tune_string[0], "cell") == 0)
259 spu_tune = PROCESSOR_CELL;
260 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
261 spu_tune = PROCESSOR_CELLEDP;
262 else
263 error ("bad value (%s) for -mtune= switch", spu_tune_string);
266 /* Change defaults according to the processor architecture. */
267 if (spu_arch == PROCESSOR_CELLEDP)
269 /* If no command line option has been otherwise specified, change
270 the default to -mno-safe-hints on celledp -- only the original
271 Cell/B.E. processors require this workaround. */
272 if (!(target_flags_explicit & MASK_SAFE_HINTS))
273 target_flags &= ~MASK_SAFE_HINTS;
276 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
279 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
280 struct attribute_spec.handler. */
282 /* True if MODE is valid for the target. By "valid", we mean able to
283 be manipulated in non-trivial ways. In particular, this means all
284 the arithmetic is supported. */
285 static bool
286 spu_scalar_mode_supported_p (enum machine_mode mode)
288 switch (mode)
290 case QImode:
291 case HImode:
292 case SImode:
293 case SFmode:
294 case DImode:
295 case TImode:
296 case DFmode:
297 return true;
299 default:
300 return false;
304 /* Similarly for vector modes. "Supported" here is less strict. At
305 least some operations are supported; need to check optabs or builtins
306 for further details. */
307 static bool
308 spu_vector_mode_supported_p (enum machine_mode mode)
310 switch (mode)
312 case V16QImode:
313 case V8HImode:
314 case V4SImode:
315 case V2DImode:
316 case V4SFmode:
317 case V2DFmode:
318 return true;
320 default:
321 return false;
325 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
326 least significant bytes of the outer mode. This function returns
327 TRUE for the SUBREG's where this is correct. */
329 valid_subreg (rtx op)
331 enum machine_mode om = GET_MODE (op);
332 enum machine_mode im = GET_MODE (SUBREG_REG (op));
333 return om != VOIDmode && im != VOIDmode
334 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
335 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
336 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
339 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
340 and adjust the start offset. */
341 static rtx
342 adjust_operand (rtx op, HOST_WIDE_INT * start)
344 enum machine_mode mode;
345 int op_size;
346 /* Strip any paradoxical SUBREG. */
347 if (GET_CODE (op) == SUBREG
348 && (GET_MODE_BITSIZE (GET_MODE (op))
349 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
351 if (start)
352 *start -=
353 GET_MODE_BITSIZE (GET_MODE (op)) -
354 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
355 op = SUBREG_REG (op);
357 /* If it is smaller than SI, assure a SUBREG */
358 op_size = GET_MODE_BITSIZE (GET_MODE (op));
359 if (op_size < 32)
361 if (start)
362 *start += 32 - op_size;
363 op_size = 32;
365 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
366 mode = mode_for_size (op_size, MODE_INT, 0);
367 if (mode != GET_MODE (op))
368 op = gen_rtx_SUBREG (mode, op, 0);
369 return op;
372 void
373 spu_expand_extv (rtx ops[], int unsignedp)
375 rtx dst = ops[0], src = ops[1];
376 HOST_WIDE_INT width = INTVAL (ops[2]);
377 HOST_WIDE_INT start = INTVAL (ops[3]);
378 HOST_WIDE_INT align_mask;
379 rtx s0, s1, mask, r0;
381 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
383 if (MEM_P (src))
385 /* First, determine if we need 1 TImode load or 2. We need only 1
386 if the bits being extracted do not cross the alignment boundary
387 as determined by the MEM and its address. */
389 align_mask = -MEM_ALIGN (src);
390 if ((start & align_mask) == ((start + width - 1) & align_mask))
392 /* Alignment is sufficient for 1 load. */
393 s0 = gen_reg_rtx (TImode);
394 r0 = spu_expand_load (s0, 0, src, start / 8);
395 start &= 7;
396 if (r0)
397 emit_insn (gen_rotqby_ti (s0, s0, r0));
399 else
401 /* Need 2 loads. */
402 s0 = gen_reg_rtx (TImode);
403 s1 = gen_reg_rtx (TImode);
404 r0 = spu_expand_load (s0, s1, src, start / 8);
405 start &= 7;
407 gcc_assert (start + width <= 128);
408 if (r0)
410 rtx r1 = gen_reg_rtx (SImode);
411 mask = gen_reg_rtx (TImode);
412 emit_move_insn (mask, GEN_INT (-1));
413 emit_insn (gen_rotqby_ti (s0, s0, r0));
414 emit_insn (gen_rotqby_ti (s1, s1, r0));
415 if (GET_CODE (r0) == CONST_INT)
416 r1 = GEN_INT (INTVAL (r0) & 15);
417 else
418 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
419 emit_insn (gen_shlqby_ti (mask, mask, r1));
420 emit_insn (gen_selb (s0, s1, s0, mask));
425 else if (GET_CODE (src) == SUBREG)
427 rtx r = SUBREG_REG (src);
428 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
429 s0 = gen_reg_rtx (TImode);
430 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
431 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
432 else
433 emit_move_insn (s0, src);
435 else
437 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
438 s0 = gen_reg_rtx (TImode);
439 emit_move_insn (s0, src);
442 /* Now s0 is TImode and contains the bits to extract at start. */
444 if (start)
445 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
447 if (128 - width)
448 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
450 emit_move_insn (dst, s0);
453 void
454 spu_expand_insv (rtx ops[])
456 HOST_WIDE_INT width = INTVAL (ops[1]);
457 HOST_WIDE_INT start = INTVAL (ops[2]);
458 HOST_WIDE_INT maskbits;
459 enum machine_mode dst_mode;
460 rtx dst = ops[0], src = ops[3];
461 int dst_size;
462 rtx mask;
463 rtx shift_reg;
464 int shift;
467 if (GET_CODE (ops[0]) == MEM)
468 dst = gen_reg_rtx (TImode);
469 else
470 dst = adjust_operand (dst, &start);
471 dst_mode = GET_MODE (dst);
472 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
474 if (CONSTANT_P (src))
476 enum machine_mode m =
477 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
478 src = force_reg (m, convert_to_mode (m, src, 0));
480 src = adjust_operand (src, 0);
482 mask = gen_reg_rtx (dst_mode);
483 shift_reg = gen_reg_rtx (dst_mode);
484 shift = dst_size - start - width;
486 /* It's not safe to use subreg here because the compiler assumes
487 that the SUBREG_REG is right justified in the SUBREG. */
488 convert_move (shift_reg, src, 1);
490 if (shift > 0)
492 switch (dst_mode)
494 case SImode:
495 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
496 break;
497 case DImode:
498 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
499 break;
500 case TImode:
501 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
502 break;
503 default:
504 abort ();
507 else if (shift < 0)
508 abort ();
510 switch (dst_size)
512 case 32:
513 maskbits = (-1ll << (32 - width - start));
514 if (start)
515 maskbits += (1ll << (32 - start));
516 emit_move_insn (mask, GEN_INT (maskbits));
517 break;
518 case 64:
519 maskbits = (-1ll << (64 - width - start));
520 if (start)
521 maskbits += (1ll << (64 - start));
522 emit_move_insn (mask, GEN_INT (maskbits));
523 break;
524 case 128:
526 unsigned char arr[16];
527 int i = start / 8;
528 memset (arr, 0, sizeof (arr));
529 arr[i] = 0xff >> (start & 7);
530 for (i++; i <= (start + width - 1) / 8; i++)
531 arr[i] = 0xff;
532 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
533 emit_move_insn (mask, array_to_constant (TImode, arr));
535 break;
536 default:
537 abort ();
539 if (GET_CODE (ops[0]) == MEM)
541 rtx low = gen_reg_rtx (SImode);
542 rtx rotl = gen_reg_rtx (SImode);
543 rtx mask0 = gen_reg_rtx (TImode);
544 rtx addr;
545 rtx addr0;
546 rtx addr1;
547 rtx mem;
549 addr = force_reg (Pmode, XEXP (ops[0], 0));
550 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
551 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
552 emit_insn (gen_negsi2 (rotl, low));
553 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
554 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
555 mem = change_address (ops[0], TImode, addr0);
556 set_mem_alias_set (mem, 0);
557 emit_move_insn (dst, mem);
558 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
559 if (start + width > MEM_ALIGN (ops[0]))
561 rtx shl = gen_reg_rtx (SImode);
562 rtx mask1 = gen_reg_rtx (TImode);
563 rtx dst1 = gen_reg_rtx (TImode);
564 rtx mem1;
565 addr1 = plus_constant (Pmode, addr, 16);
566 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
567 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
568 emit_insn (gen_shlqby_ti (mask1, mask, shl));
569 mem1 = change_address (ops[0], TImode, addr1);
570 set_mem_alias_set (mem1, 0);
571 emit_move_insn (dst1, mem1);
572 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
573 emit_move_insn (mem1, dst1);
575 emit_move_insn (mem, dst);
577 else
578 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
583 spu_expand_block_move (rtx ops[])
585 HOST_WIDE_INT bytes, align, offset;
586 rtx src, dst, sreg, dreg, target;
587 int i;
588 if (GET_CODE (ops[2]) != CONST_INT
589 || GET_CODE (ops[3]) != CONST_INT
590 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
591 return 0;
593 bytes = INTVAL (ops[2]);
594 align = INTVAL (ops[3]);
596 if (bytes <= 0)
597 return 1;
599 dst = ops[0];
600 src = ops[1];
602 if (align == 16)
604 for (offset = 0; offset + 16 <= bytes; offset += 16)
606 dst = adjust_address (ops[0], V16QImode, offset);
607 src = adjust_address (ops[1], V16QImode, offset);
608 emit_move_insn (dst, src);
610 if (offset < bytes)
612 rtx mask;
613 unsigned char arr[16] = { 0 };
614 for (i = 0; i < bytes - offset; i++)
615 arr[i] = 0xff;
616 dst = adjust_address (ops[0], V16QImode, offset);
617 src = adjust_address (ops[1], V16QImode, offset);
618 mask = gen_reg_rtx (V16QImode);
619 sreg = gen_reg_rtx (V16QImode);
620 dreg = gen_reg_rtx (V16QImode);
621 target = gen_reg_rtx (V16QImode);
622 emit_move_insn (mask, array_to_constant (V16QImode, arr));
623 emit_move_insn (dreg, dst);
624 emit_move_insn (sreg, src);
625 emit_insn (gen_selb (target, dreg, sreg, mask));
626 emit_move_insn (dst, target);
628 return 1;
630 return 0;
633 enum spu_comp_code
634 { SPU_EQ, SPU_GT, SPU_GTU };
636 int spu_comp_icode[12][3] = {
637 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
638 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
639 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
640 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
641 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
642 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
643 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
644 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
645 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
646 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
647 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
648 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
651 /* Generate a compare for CODE. Return a brand-new rtx that represents
652 the result of the compare. GCC can figure this out too if we don't
653 provide all variations of compares, but GCC always wants to use
654 WORD_MODE, we can generate better code in most cases if we do it
655 ourselves. */
656 void
657 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
659 int reverse_compare = 0;
660 int reverse_test = 0;
661 rtx compare_result, eq_result;
662 rtx comp_rtx, eq_rtx;
663 enum machine_mode comp_mode;
664 enum machine_mode op_mode;
665 enum spu_comp_code scode, eq_code;
666 enum insn_code ior_code;
667 enum rtx_code code = GET_CODE (cmp);
668 rtx op0 = XEXP (cmp, 0);
669 rtx op1 = XEXP (cmp, 1);
670 int index;
671 int eq_test = 0;
673 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
674 and so on, to keep the constant in operand 1. */
675 if (GET_CODE (op1) == CONST_INT)
677 HOST_WIDE_INT val = INTVAL (op1) - 1;
678 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
679 switch (code)
681 case GE:
682 op1 = GEN_INT (val);
683 code = GT;
684 break;
685 case LT:
686 op1 = GEN_INT (val);
687 code = LE;
688 break;
689 case GEU:
690 op1 = GEN_INT (val);
691 code = GTU;
692 break;
693 case LTU:
694 op1 = GEN_INT (val);
695 code = LEU;
696 break;
697 default:
698 break;
702 /* However, if we generate an integer result, performing a reverse test
703 would require an extra negation, so avoid that where possible. */
704 if (GET_CODE (op1) == CONST_INT && is_set == 1)
706 HOST_WIDE_INT val = INTVAL (op1) + 1;
707 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
708 switch (code)
710 case LE:
711 op1 = GEN_INT (val);
712 code = LT;
713 break;
714 case LEU:
715 op1 = GEN_INT (val);
716 code = LTU;
717 break;
718 default:
719 break;
723 comp_mode = SImode;
724 op_mode = GET_MODE (op0);
726 switch (code)
728 case GE:
729 scode = SPU_GT;
730 if (HONOR_NANS (op_mode))
732 reverse_compare = 0;
733 reverse_test = 0;
734 eq_test = 1;
735 eq_code = SPU_EQ;
737 else
739 reverse_compare = 1;
740 reverse_test = 1;
742 break;
743 case LE:
744 scode = SPU_GT;
745 if (HONOR_NANS (op_mode))
747 reverse_compare = 1;
748 reverse_test = 0;
749 eq_test = 1;
750 eq_code = SPU_EQ;
752 else
754 reverse_compare = 0;
755 reverse_test = 1;
757 break;
758 case LT:
759 reverse_compare = 1;
760 reverse_test = 0;
761 scode = SPU_GT;
762 break;
763 case GEU:
764 reverse_compare = 1;
765 reverse_test = 1;
766 scode = SPU_GTU;
767 break;
768 case LEU:
769 reverse_compare = 0;
770 reverse_test = 1;
771 scode = SPU_GTU;
772 break;
773 case LTU:
774 reverse_compare = 1;
775 reverse_test = 0;
776 scode = SPU_GTU;
777 break;
778 case NE:
779 reverse_compare = 0;
780 reverse_test = 1;
781 scode = SPU_EQ;
782 break;
784 case EQ:
785 scode = SPU_EQ;
786 break;
787 case GT:
788 scode = SPU_GT;
789 break;
790 case GTU:
791 scode = SPU_GTU;
792 break;
793 default:
794 scode = SPU_EQ;
795 break;
798 switch (op_mode)
800 case QImode:
801 index = 0;
802 comp_mode = QImode;
803 break;
804 case HImode:
805 index = 1;
806 comp_mode = HImode;
807 break;
808 case SImode:
809 index = 2;
810 break;
811 case DImode:
812 index = 3;
813 break;
814 case TImode:
815 index = 4;
816 break;
817 case SFmode:
818 index = 5;
819 break;
820 case DFmode:
821 index = 6;
822 break;
823 case V16QImode:
824 index = 7;
825 comp_mode = op_mode;
826 break;
827 case V8HImode:
828 index = 8;
829 comp_mode = op_mode;
830 break;
831 case V4SImode:
832 index = 9;
833 comp_mode = op_mode;
834 break;
835 case V4SFmode:
836 index = 10;
837 comp_mode = V4SImode;
838 break;
839 case V2DFmode:
840 index = 11;
841 comp_mode = V2DImode;
842 break;
843 case V2DImode:
844 default:
845 abort ();
848 if (GET_MODE (op1) == DFmode
849 && (scode != SPU_GT && scode != SPU_EQ))
850 abort ();
852 if (is_set == 0 && op1 == const0_rtx
853 && (GET_MODE (op0) == SImode
854 || GET_MODE (op0) == HImode
855 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
857 /* Don't need to set a register with the result when we are
858 comparing against zero and branching. */
859 reverse_test = !reverse_test;
860 compare_result = op0;
862 else
864 compare_result = gen_reg_rtx (comp_mode);
866 if (reverse_compare)
868 rtx t = op1;
869 op1 = op0;
870 op0 = t;
873 if (spu_comp_icode[index][scode] == 0)
874 abort ();
876 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
877 (op0, op_mode))
878 op0 = force_reg (op_mode, op0);
879 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
880 (op1, op_mode))
881 op1 = force_reg (op_mode, op1);
882 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
883 op0, op1);
884 if (comp_rtx == 0)
885 abort ();
886 emit_insn (comp_rtx);
888 if (eq_test)
890 eq_result = gen_reg_rtx (comp_mode);
891 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
892 op0, op1);
893 if (eq_rtx == 0)
894 abort ();
895 emit_insn (eq_rtx);
896 ior_code = optab_handler (ior_optab, comp_mode);
897 gcc_assert (ior_code != CODE_FOR_nothing);
898 emit_insn (GEN_FCN (ior_code)
899 (compare_result, compare_result, eq_result));
903 if (is_set == 0)
905 rtx bcomp;
906 rtx loc_ref;
908 /* We don't have branch on QI compare insns, so we convert the
909 QI compare result to a HI result. */
910 if (comp_mode == QImode)
912 rtx old_res = compare_result;
913 compare_result = gen_reg_rtx (HImode);
914 comp_mode = HImode;
915 emit_insn (gen_extendqihi2 (compare_result, old_res));
918 if (reverse_test)
919 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
920 else
921 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
923 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
924 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
925 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
926 loc_ref, pc_rtx)));
928 else if (is_set == 2)
930 rtx target = operands[0];
931 int compare_size = GET_MODE_BITSIZE (comp_mode);
932 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
933 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
934 rtx select_mask;
935 rtx op_t = operands[2];
936 rtx op_f = operands[3];
938 /* The result of the comparison can be SI, HI or QI mode. Create a
939 mask based on that result. */
940 if (target_size > compare_size)
942 select_mask = gen_reg_rtx (mode);
943 emit_insn (gen_extend_compare (select_mask, compare_result));
945 else if (target_size < compare_size)
946 select_mask =
947 gen_rtx_SUBREG (mode, compare_result,
948 (compare_size - target_size) / BITS_PER_UNIT);
949 else if (comp_mode != mode)
950 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
951 else
952 select_mask = compare_result;
954 if (GET_MODE (target) != GET_MODE (op_t)
955 || GET_MODE (target) != GET_MODE (op_f))
956 abort ();
958 if (reverse_test)
959 emit_insn (gen_selb (target, op_t, op_f, select_mask));
960 else
961 emit_insn (gen_selb (target, op_f, op_t, select_mask));
963 else
965 rtx target = operands[0];
966 if (reverse_test)
967 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
968 gen_rtx_NOT (comp_mode, compare_result)));
969 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
970 emit_insn (gen_extendhisi2 (target, compare_result));
971 else if (GET_MODE (target) == SImode
972 && GET_MODE (compare_result) == QImode)
973 emit_insn (gen_extend_compare (target, compare_result));
974 else
975 emit_move_insn (target, compare_result);
979 HOST_WIDE_INT
980 const_double_to_hwint (rtx x)
982 HOST_WIDE_INT val;
983 REAL_VALUE_TYPE rv;
984 if (GET_MODE (x) == SFmode)
986 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
987 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
989 else if (GET_MODE (x) == DFmode)
991 long l[2];
992 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
993 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
994 val = l[0];
995 val = (val << 32) | (l[1] & 0xffffffff);
997 else
998 abort ();
999 return val;
1003 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1005 long tv[2];
1006 REAL_VALUE_TYPE rv;
1007 gcc_assert (mode == SFmode || mode == DFmode);
1009 if (mode == SFmode)
1010 tv[0] = (v << 32) >> 32;
1011 else if (mode == DFmode)
1013 tv[1] = (v << 32) >> 32;
1014 tv[0] = v >> 32;
1016 real_from_target (&rv, tv, mode);
1017 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1020 void
1021 print_operand_address (FILE * file, register rtx addr)
1023 rtx reg;
1024 rtx offset;
1026 if (GET_CODE (addr) == AND
1027 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1028 && INTVAL (XEXP (addr, 1)) == -16)
1029 addr = XEXP (addr, 0);
1031 switch (GET_CODE (addr))
1033 case REG:
1034 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1035 break;
1037 case PLUS:
1038 reg = XEXP (addr, 0);
1039 offset = XEXP (addr, 1);
1040 if (GET_CODE (offset) == REG)
1042 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1043 reg_names[REGNO (offset)]);
1045 else if (GET_CODE (offset) == CONST_INT)
1047 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1048 INTVAL (offset), reg_names[REGNO (reg)]);
1050 else
1051 abort ();
1052 break;
1054 case CONST:
1055 case LABEL_REF:
1056 case SYMBOL_REF:
1057 case CONST_INT:
1058 output_addr_const (file, addr);
1059 break;
1061 default:
1062 debug_rtx (addr);
1063 abort ();
1067 void
1068 print_operand (FILE * file, rtx x, int code)
1070 enum machine_mode mode = GET_MODE (x);
1071 HOST_WIDE_INT val;
1072 unsigned char arr[16];
1073 int xcode = GET_CODE (x);
1074 int i, info;
1075 if (GET_MODE (x) == VOIDmode)
1076 switch (code)
1078 case 'L': /* 128 bits, signed */
1079 case 'm': /* 128 bits, signed */
1080 case 'T': /* 128 bits, signed */
1081 case 't': /* 128 bits, signed */
1082 mode = TImode;
1083 break;
1084 case 'K': /* 64 bits, signed */
1085 case 'k': /* 64 bits, signed */
1086 case 'D': /* 64 bits, signed */
1087 case 'd': /* 64 bits, signed */
1088 mode = DImode;
1089 break;
1090 case 'J': /* 32 bits, signed */
1091 case 'j': /* 32 bits, signed */
1092 case 's': /* 32 bits, signed */
1093 case 'S': /* 32 bits, signed */
1094 mode = SImode;
1095 break;
1097 switch (code)
1100 case 'j': /* 32 bits, signed */
1101 case 'k': /* 64 bits, signed */
1102 case 'm': /* 128 bits, signed */
1103 if (xcode == CONST_INT
1104 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1106 gcc_assert (logical_immediate_p (x, mode));
1107 constant_to_array (mode, x, arr);
1108 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1109 val = trunc_int_for_mode (val, SImode);
1110 switch (which_logical_immediate (val))
1112 case SPU_ORI:
1113 break;
1114 case SPU_ORHI:
1115 fprintf (file, "h");
1116 break;
1117 case SPU_ORBI:
1118 fprintf (file, "b");
1119 break;
1120 default:
1121 gcc_unreachable();
1124 else
1125 gcc_unreachable();
1126 return;
1128 case 'J': /* 32 bits, signed */
1129 case 'K': /* 64 bits, signed */
1130 case 'L': /* 128 bits, signed */
1131 if (xcode == CONST_INT
1132 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1134 gcc_assert (logical_immediate_p (x, mode)
1135 || iohl_immediate_p (x, mode));
1136 constant_to_array (mode, x, arr);
1137 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1138 val = trunc_int_for_mode (val, SImode);
1139 switch (which_logical_immediate (val))
1141 case SPU_ORI:
1142 case SPU_IOHL:
1143 break;
1144 case SPU_ORHI:
1145 val = trunc_int_for_mode (val, HImode);
1146 break;
1147 case SPU_ORBI:
1148 val = trunc_int_for_mode (val, QImode);
1149 break;
1150 default:
1151 gcc_unreachable();
1153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1155 else
1156 gcc_unreachable();
1157 return;
1159 case 't': /* 128 bits, signed */
1160 case 'd': /* 64 bits, signed */
1161 case 's': /* 32 bits, signed */
1162 if (CONSTANT_P (x))
1164 enum immediate_class c = classify_immediate (x, mode);
1165 switch (c)
1167 case IC_IL1:
1168 constant_to_array (mode, x, arr);
1169 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1170 val = trunc_int_for_mode (val, SImode);
1171 switch (which_immediate_load (val))
1173 case SPU_IL:
1174 break;
1175 case SPU_ILA:
1176 fprintf (file, "a");
1177 break;
1178 case SPU_ILH:
1179 fprintf (file, "h");
1180 break;
1181 case SPU_ILHU:
1182 fprintf (file, "hu");
1183 break;
1184 default:
1185 gcc_unreachable ();
1187 break;
1188 case IC_CPAT:
1189 constant_to_array (mode, x, arr);
1190 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1191 if (info == 1)
1192 fprintf (file, "b");
1193 else if (info == 2)
1194 fprintf (file, "h");
1195 else if (info == 4)
1196 fprintf (file, "w");
1197 else if (info == 8)
1198 fprintf (file, "d");
1199 break;
1200 case IC_IL1s:
1201 if (xcode == CONST_VECTOR)
1203 x = CONST_VECTOR_ELT (x, 0);
1204 xcode = GET_CODE (x);
1206 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1207 fprintf (file, "a");
1208 else if (xcode == HIGH)
1209 fprintf (file, "hu");
1210 break;
1211 case IC_FSMBI:
1212 case IC_FSMBI2:
1213 case IC_IL2:
1214 case IC_IL2s:
1215 case IC_POOL:
1216 abort ();
1219 else
1220 gcc_unreachable ();
1221 return;
1223 case 'T': /* 128 bits, signed */
1224 case 'D': /* 64 bits, signed */
1225 case 'S': /* 32 bits, signed */
1226 if (CONSTANT_P (x))
1228 enum immediate_class c = classify_immediate (x, mode);
1229 switch (c)
1231 case IC_IL1:
1232 constant_to_array (mode, x, arr);
1233 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1234 val = trunc_int_for_mode (val, SImode);
1235 switch (which_immediate_load (val))
1237 case SPU_IL:
1238 case SPU_ILA:
1239 break;
1240 case SPU_ILH:
1241 case SPU_ILHU:
1242 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1243 break;
1244 default:
1245 gcc_unreachable ();
1247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1248 break;
1249 case IC_FSMBI:
1250 constant_to_array (mode, x, arr);
1251 val = 0;
1252 for (i = 0; i < 16; i++)
1254 val <<= 1;
1255 val |= arr[i] & 1;
1257 print_operand (file, GEN_INT (val), 0);
1258 break;
1259 case IC_CPAT:
1260 constant_to_array (mode, x, arr);
1261 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1263 break;
1264 case IC_IL1s:
1265 if (xcode == HIGH)
1266 x = XEXP (x, 0);
1267 if (GET_CODE (x) == CONST_VECTOR)
1268 x = CONST_VECTOR_ELT (x, 0);
1269 output_addr_const (file, x);
1270 if (xcode == HIGH)
1271 fprintf (file, "@h");
1272 break;
1273 case IC_IL2:
1274 case IC_IL2s:
1275 case IC_FSMBI2:
1276 case IC_POOL:
1277 abort ();
1280 else
1281 gcc_unreachable ();
1282 return;
1284 case 'C':
1285 if (xcode == CONST_INT)
1287 /* Only 4 least significant bits are relevant for generate
1288 control word instructions. */
1289 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1290 return;
1292 break;
1294 case 'M': /* print code for c*d */
1295 if (GET_CODE (x) == CONST_INT)
1296 switch (INTVAL (x))
1298 case 1:
1299 fprintf (file, "b");
1300 break;
1301 case 2:
1302 fprintf (file, "h");
1303 break;
1304 case 4:
1305 fprintf (file, "w");
1306 break;
1307 case 8:
1308 fprintf (file, "d");
1309 break;
1310 default:
1311 gcc_unreachable();
1313 else
1314 gcc_unreachable();
1315 return;
1317 case 'N': /* Negate the operand */
1318 if (xcode == CONST_INT)
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1320 else if (xcode == CONST_VECTOR)
1321 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1322 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1323 return;
1325 case 'I': /* enable/disable interrupts */
1326 if (xcode == CONST_INT)
1327 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1328 return;
1330 case 'b': /* branch modifiers */
1331 if (xcode == REG)
1332 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1333 else if (COMPARISON_P (x))
1334 fprintf (file, "%s", xcode == NE ? "n" : "");
1335 return;
1337 case 'i': /* indirect call */
1338 if (xcode == MEM)
1340 if (GET_CODE (XEXP (x, 0)) == REG)
1341 /* Used in indirect function calls. */
1342 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1343 else
1344 output_address (XEXP (x, 0));
1346 return;
1348 case 'p': /* load/store */
1349 if (xcode == MEM)
1351 x = XEXP (x, 0);
1352 xcode = GET_CODE (x);
1354 if (xcode == AND)
1356 x = XEXP (x, 0);
1357 xcode = GET_CODE (x);
1359 if (xcode == REG)
1360 fprintf (file, "d");
1361 else if (xcode == CONST_INT)
1362 fprintf (file, "a");
1363 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1364 fprintf (file, "r");
1365 else if (xcode == PLUS || xcode == LO_SUM)
1367 if (GET_CODE (XEXP (x, 1)) == REG)
1368 fprintf (file, "x");
1369 else
1370 fprintf (file, "d");
1372 return;
1374 case 'e':
1375 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1376 val &= 0x7;
1377 output_addr_const (file, GEN_INT (val));
1378 return;
1380 case 'f':
1381 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1382 val &= 0x1f;
1383 output_addr_const (file, GEN_INT (val));
1384 return;
1386 case 'g':
1387 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1388 val &= 0x3f;
1389 output_addr_const (file, GEN_INT (val));
1390 return;
1392 case 'h':
1393 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1394 val = (val >> 3) & 0x1f;
1395 output_addr_const (file, GEN_INT (val));
1396 return;
1398 case 'E':
1399 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1400 val = -val;
1401 val &= 0x7;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1405 case 'F':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val = -val;
1408 val &= 0x1f;
1409 output_addr_const (file, GEN_INT (val));
1410 return;
1412 case 'G':
1413 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1414 val = -val;
1415 val &= 0x3f;
1416 output_addr_const (file, GEN_INT (val));
1417 return;
1419 case 'H':
1420 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1421 val = -(val & -8ll);
1422 val = (val >> 3) & 0x1f;
1423 output_addr_const (file, GEN_INT (val));
1424 return;
1426 case 'v':
1427 case 'w':
1428 constant_to_array (mode, x, arr);
1429 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1430 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1431 return;
1433 case 0:
1434 if (xcode == REG)
1435 fprintf (file, "%s", reg_names[REGNO (x)]);
1436 else if (xcode == MEM)
1437 output_address (XEXP (x, 0));
1438 else if (xcode == CONST_VECTOR)
1439 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1440 else
1441 output_addr_const (file, x);
1442 return;
1444 /* unused letters
1445 o qr u yz
1446 AB OPQR UVWXYZ */
1447 default:
1448 output_operand_lossage ("invalid %%xn code");
1450 gcc_unreachable ();
1453 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1454 caller saved register. For leaf functions it is more efficient to
1455 use a volatile register because we won't need to save and restore the
1456 pic register. This routine is only valid after register allocation
1457 is completed, so we can pick an unused register. */
1458 static rtx
1459 get_pic_reg (void)
1461 if (!reload_completed && !reload_in_progress)
1462 abort ();
1464 /* If we've already made the decision, we need to keep with it. Once we've
1465 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1466 return true since the register is now live; this should not cause us to
1467 "switch back" to using pic_offset_table_rtx. */
1468 if (!cfun->machine->pic_reg)
1470 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1471 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1472 else
1473 cfun->machine->pic_reg = pic_offset_table_rtx;
1476 return cfun->machine->pic_reg;
1479 /* Split constant addresses to handle cases that are too large.
1480 Add in the pic register when in PIC mode.
1481 Split immediates that require more than 1 instruction. */
1483 spu_split_immediate (rtx * ops)
1485 enum machine_mode mode = GET_MODE (ops[0]);
1486 enum immediate_class c = classify_immediate (ops[1], mode);
1488 switch (c)
1490 case IC_IL2:
1492 unsigned char arrhi[16];
1493 unsigned char arrlo[16];
1494 rtx to, temp, hi, lo;
1495 int i;
1496 enum machine_mode imode = mode;
1497 /* We need to do reals as ints because the constant used in the
1498 IOR might not be a legitimate real constant. */
1499 imode = int_mode_for_mode (mode);
1500 constant_to_array (mode, ops[1], arrhi);
1501 if (imode != mode)
1502 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1503 else
1504 to = ops[0];
1505 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1506 for (i = 0; i < 16; i += 4)
1508 arrlo[i + 2] = arrhi[i + 2];
1509 arrlo[i + 3] = arrhi[i + 3];
1510 arrlo[i + 0] = arrlo[i + 1] = 0;
1511 arrhi[i + 2] = arrhi[i + 3] = 0;
1513 hi = array_to_constant (imode, arrhi);
1514 lo = array_to_constant (imode, arrlo);
1515 emit_move_insn (temp, hi);
1516 emit_insn (gen_rtx_SET
1517 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1518 return 1;
1520 case IC_FSMBI2:
1522 unsigned char arr_fsmbi[16];
1523 unsigned char arr_andbi[16];
1524 rtx to, reg_fsmbi, reg_and;
1525 int i;
1526 enum machine_mode imode = mode;
1527 /* We need to do reals as ints because the constant used in the
1528 * AND might not be a legitimate real constant. */
1529 imode = int_mode_for_mode (mode);
1530 constant_to_array (mode, ops[1], arr_fsmbi);
1531 if (imode != mode)
1532 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1533 else
1534 to = ops[0];
1535 for (i = 0; i < 16; i++)
1536 if (arr_fsmbi[i] != 0)
1538 arr_andbi[0] = arr_fsmbi[i];
1539 arr_fsmbi[i] = 0xff;
1541 for (i = 1; i < 16; i++)
1542 arr_andbi[i] = arr_andbi[0];
1543 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1544 reg_and = array_to_constant (imode, arr_andbi);
1545 emit_move_insn (to, reg_fsmbi);
1546 emit_insn (gen_rtx_SET
1547 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1548 return 1;
1550 case IC_POOL:
1551 if (reload_in_progress || reload_completed)
1553 rtx mem = force_const_mem (mode, ops[1]);
1554 if (TARGET_LARGE_MEM)
1556 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1557 emit_move_insn (addr, XEXP (mem, 0));
1558 mem = replace_equiv_address (mem, addr);
1560 emit_move_insn (ops[0], mem);
1561 return 1;
1563 break;
1564 case IC_IL1s:
1565 case IC_IL2s:
1566 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1568 if (c == IC_IL2s)
1570 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1571 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1573 else if (flag_pic)
1574 emit_insn (gen_pic (ops[0], ops[1]));
1575 if (flag_pic)
1577 rtx pic_reg = get_pic_reg ();
1578 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1580 return flag_pic || c == IC_IL2s;
1582 break;
1583 case IC_IL1:
1584 case IC_FSMBI:
1585 case IC_CPAT:
1586 break;
1588 return 0;
1591 /* SAVING is TRUE when we are generating the actual load and store
1592 instructions for REGNO. When determining the size of the stack
1593 needed for saving register we must allocate enough space for the
1594 worst case, because we don't always have the information early enough
1595 to not allocate it. But we can at least eliminate the actual loads
1596 and stores during the prologue/epilogue. */
1597 static int
1598 need_to_save_reg (int regno, int saving)
1600 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1601 return 1;
1602 if (flag_pic
1603 && regno == PIC_OFFSET_TABLE_REGNUM
1604 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1605 return 1;
1606 return 0;
1609 /* This function is only correct starting with local register
1610 allocation */
1612 spu_saved_regs_size (void)
1614 int reg_save_size = 0;
1615 int regno;
1617 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1618 if (need_to_save_reg (regno, 0))
1619 reg_save_size += 0x10;
1620 return reg_save_size;
1623 static rtx
1624 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1626 rtx reg = gen_rtx_REG (V4SImode, regno);
1627 rtx mem =
1628 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1629 return emit_insn (gen_movv4si (mem, reg));
1632 static rtx
1633 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1635 rtx reg = gen_rtx_REG (V4SImode, regno);
1636 rtx mem =
1637 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1638 return emit_insn (gen_movv4si (reg, mem));
1641 /* This happens after reload, so we need to expand it. */
1642 static rtx
1643 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1645 rtx insn;
1646 if (satisfies_constraint_K (GEN_INT (imm)))
1648 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1650 else
1652 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1653 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1654 if (REGNO (src) == REGNO (scratch))
1655 abort ();
1657 return insn;
1660 /* Return nonzero if this function is known to have a null epilogue. */
1663 direct_return (void)
1665 if (reload_completed)
1667 if (cfun->static_chain_decl == 0
1668 && (spu_saved_regs_size ()
1669 + get_frame_size ()
1670 + crtl->outgoing_args_size
1671 + crtl->args.pretend_args_size == 0)
1672 && crtl->is_leaf)
1673 return 1;
1675 return 0;
1679 The stack frame looks like this:
1680 +-------------+
1681 | incoming |
1682 | args |
1683 AP -> +-------------+
1684 | $lr save |
1685 +-------------+
1686 prev SP | back chain |
1687 +-------------+
1688 | var args |
1689 | reg save | crtl->args.pretend_args_size bytes
1690 +-------------+
1691 | ... |
1692 | saved regs | spu_saved_regs_size() bytes
1693 FP -> +-------------+
1694 | ... |
1695 | vars | get_frame_size() bytes
1696 HFP -> +-------------+
1697 | ... |
1698 | outgoing |
1699 | args | crtl->outgoing_args_size bytes
1700 +-------------+
1701 | $lr of next |
1702 | frame |
1703 +-------------+
1704 | back chain |
1705 SP -> +-------------+
1708 void
1709 spu_expand_prologue (void)
1711 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1712 HOST_WIDE_INT total_size;
1713 HOST_WIDE_INT saved_regs_size;
1714 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1715 rtx scratch_reg_0, scratch_reg_1;
1716 rtx insn, real;
1718 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1719 cfun->machine->pic_reg = pic_offset_table_rtx;
1721 if (spu_naked_function_p (current_function_decl))
1722 return;
1724 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1725 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1727 saved_regs_size = spu_saved_regs_size ();
1728 total_size = size + saved_regs_size
1729 + crtl->outgoing_args_size
1730 + crtl->args.pretend_args_size;
1732 if (!crtl->is_leaf
1733 || cfun->calls_alloca || total_size > 0)
1734 total_size += STACK_POINTER_OFFSET;
1736 /* Save this first because code after this might use the link
1737 register as a scratch register. */
1738 if (!crtl->is_leaf)
1740 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1741 RTX_FRAME_RELATED_P (insn) = 1;
1744 if (total_size > 0)
1746 offset = -crtl->args.pretend_args_size;
1747 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1748 if (need_to_save_reg (regno, 1))
1750 offset -= 16;
1751 insn = frame_emit_store (regno, sp_reg, offset);
1752 RTX_FRAME_RELATED_P (insn) = 1;
1756 if (flag_pic && cfun->machine->pic_reg)
1758 rtx pic_reg = cfun->machine->pic_reg;
1759 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1760 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1763 if (total_size > 0)
1765 if (flag_stack_check)
1767 /* We compare against total_size-1 because
1768 ($sp >= total_size) <=> ($sp > total_size-1) */
1769 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1770 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1771 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1772 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1774 emit_move_insn (scratch_v4si, size_v4si);
1775 size_v4si = scratch_v4si;
1777 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1778 emit_insn (gen_vec_extractv4si
1779 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1780 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1783 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1784 the value of the previous $sp because we save it as the back
1785 chain. */
1786 if (total_size <= 2000)
1788 /* In this case we save the back chain first. */
1789 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1790 insn =
1791 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1793 else
1795 insn = emit_move_insn (scratch_reg_0, sp_reg);
1796 insn =
1797 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1799 RTX_FRAME_RELATED_P (insn) = 1;
1800 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1801 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1803 if (total_size > 2000)
1805 /* Save the back chain ptr */
1806 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1809 if (frame_pointer_needed)
1811 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1812 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1813 + crtl->outgoing_args_size;
1814 /* Set the new frame_pointer */
1815 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1816 RTX_FRAME_RELATED_P (insn) = 1;
1817 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1818 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1819 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1823 if (flag_stack_usage_info)
1824 current_function_static_stack_size = total_size;
1827 void
1828 spu_expand_epilogue (bool sibcall_p)
1830 int size = get_frame_size (), offset, regno;
1831 HOST_WIDE_INT saved_regs_size, total_size;
1832 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1833 rtx scratch_reg_0;
1835 if (spu_naked_function_p (current_function_decl))
1836 return;
1838 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1840 saved_regs_size = spu_saved_regs_size ();
1841 total_size = size + saved_regs_size
1842 + crtl->outgoing_args_size
1843 + crtl->args.pretend_args_size;
1845 if (!crtl->is_leaf
1846 || cfun->calls_alloca || total_size > 0)
1847 total_size += STACK_POINTER_OFFSET;
1849 if (total_size > 0)
1851 if (cfun->calls_alloca)
1852 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1853 else
1854 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1857 if (saved_regs_size > 0)
1859 offset = -crtl->args.pretend_args_size;
1860 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1861 if (need_to_save_reg (regno, 1))
1863 offset -= 0x10;
1864 frame_emit_load (regno, sp_reg, offset);
1869 if (!crtl->is_leaf)
1870 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1872 if (!sibcall_p)
1874 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1875 emit_jump_insn (gen__return ());
1880 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1882 if (count != 0)
1883 return 0;
1884 /* This is inefficient because it ends up copying to a save-register
1885 which then gets saved even though $lr has already been saved. But
1886 it does generate better code for leaf functions and we don't need
1887 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1888 used for __builtin_return_address anyway, so maybe we don't care if
1889 it's inefficient. */
1890 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1894 /* Given VAL, generate a constant appropriate for MODE.
1895 If MODE is a vector mode, every element will be VAL.
1896 For TImode, VAL will be zero extended to 128 bits. */
1898 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1900 rtx inner;
1901 rtvec v;
1902 int units, i;
1904 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1905 || GET_MODE_CLASS (mode) == MODE_FLOAT
1906 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1907 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1909 if (GET_MODE_CLASS (mode) == MODE_INT)
1910 return immed_double_const (val, 0, mode);
1912 /* val is the bit representation of the float */
1913 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1914 return hwint_to_const_double (mode, val);
1916 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1917 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1918 else
1919 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1921 units = GET_MODE_NUNITS (mode);
1923 v = rtvec_alloc (units);
1925 for (i = 0; i < units; ++i)
1926 RTVEC_ELT (v, i) = inner;
1928 return gen_rtx_CONST_VECTOR (mode, v);
1931 /* Create a MODE vector constant from 4 ints. */
1933 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1935 unsigned char arr[16];
1936 arr[0] = (a >> 24) & 0xff;
1937 arr[1] = (a >> 16) & 0xff;
1938 arr[2] = (a >> 8) & 0xff;
1939 arr[3] = (a >> 0) & 0xff;
1940 arr[4] = (b >> 24) & 0xff;
1941 arr[5] = (b >> 16) & 0xff;
1942 arr[6] = (b >> 8) & 0xff;
1943 arr[7] = (b >> 0) & 0xff;
1944 arr[8] = (c >> 24) & 0xff;
1945 arr[9] = (c >> 16) & 0xff;
1946 arr[10] = (c >> 8) & 0xff;
1947 arr[11] = (c >> 0) & 0xff;
1948 arr[12] = (d >> 24) & 0xff;
1949 arr[13] = (d >> 16) & 0xff;
1950 arr[14] = (d >> 8) & 0xff;
1951 arr[15] = (d >> 0) & 0xff;
1952 return array_to_constant(mode, arr);
1955 /* branch hint stuff */
1957 /* An array of these is used to propagate hints to predecessor blocks. */
1958 struct spu_bb_info
1960 rtx prop_jump; /* propagated from another block */
1961 int bb_index; /* the original block. */
1963 static struct spu_bb_info *spu_bb_info;
1965 #define STOP_HINT_P(INSN) \
1966 (CALL_P(INSN) \
1967 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1968 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1970 /* 1 when RTX is a hinted branch or its target. We keep track of
1971 what has been hinted so the safe-hint code can test it easily. */
1972 #define HINTED_P(RTX) \
1973 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1975 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1976 #define SCHED_ON_EVEN_P(RTX) \
1977 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1979 /* Emit a nop for INSN such that the two will dual issue. This assumes
1980 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1981 We check for TImode to handle a MULTI1 insn which has dual issued its
1982 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1983 static void
1984 emit_nop_for_insn (rtx insn)
1986 int p;
1987 rtx new_insn;
1989 /* We need to handle JUMP_TABLE_DATA separately. */
1990 if (JUMP_TABLE_DATA_P (insn))
1992 new_insn = emit_insn_after (gen_lnop(), insn);
1993 recog_memoized (new_insn);
1994 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1995 return;
1998 p = get_pipe (insn);
1999 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2000 new_insn = emit_insn_after (gen_lnop (), insn);
2001 else if (p == 1 && GET_MODE (insn) == TImode)
2003 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2004 PUT_MODE (new_insn, TImode);
2005 PUT_MODE (insn, VOIDmode);
2007 else
2008 new_insn = emit_insn_after (gen_lnop (), insn);
2009 recog_memoized (new_insn);
2010 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2013 /* Insert nops in basic blocks to meet dual issue alignment
2014 requirements. Also make sure hbrp and hint instructions are at least
2015 one cycle apart, possibly inserting a nop. */
2016 static void
2017 pad_bb(void)
2019 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2020 int length;
2021 int addr;
2023 /* This sets up INSN_ADDRESSES. */
2024 shorten_branches (get_insns ());
2026 /* Keep track of length added by nops. */
2027 length = 0;
2029 prev_insn = 0;
2030 insn = get_insns ();
2031 if (!active_insn_p (insn))
2032 insn = next_active_insn (insn);
2033 for (; insn; insn = next_insn)
2035 next_insn = next_active_insn (insn);
2036 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2037 || INSN_CODE (insn) == CODE_FOR_hbr)
2039 if (hbr_insn)
2041 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2042 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2043 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2044 || (a1 - a0 == 4))
2046 prev_insn = emit_insn_before (gen_lnop (), insn);
2047 PUT_MODE (prev_insn, GET_MODE (insn));
2048 PUT_MODE (insn, TImode);
2049 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2050 length += 4;
2053 hbr_insn = insn;
2055 if (INSN_CODE (insn) == CODE_FOR_blockage)
2057 if (GET_MODE (insn) == TImode)
2058 PUT_MODE (next_insn, TImode);
2059 insn = next_insn;
2060 next_insn = next_active_insn (insn);
2062 addr = INSN_ADDRESSES (INSN_UID (insn));
2063 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2065 if (((addr + length) & 7) != 0)
2067 emit_nop_for_insn (prev_insn);
2068 length += 4;
2071 else if (GET_MODE (insn) == TImode
2072 && ((next_insn && GET_MODE (next_insn) != TImode)
2073 || get_attr_type (insn) == TYPE_MULTI0)
2074 && ((addr + length) & 7) != 0)
2076 /* prev_insn will always be set because the first insn is
2077 always 8-byte aligned. */
2078 emit_nop_for_insn (prev_insn);
2079 length += 4;
2081 prev_insn = insn;
2086 /* Routines for branch hints. */
2088 static void
2089 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2090 int distance, sbitmap blocks)
2092 rtx branch_label = 0;
2093 rtx hint;
2094 rtx insn;
2095 rtx table;
2097 if (before == 0 || branch == 0 || target == 0)
2098 return;
2100 /* While scheduling we require hints to be no further than 600, so
2101 we need to enforce that here too */
2102 if (distance > 600)
2103 return;
2105 /* If we have a Basic block note, emit it after the basic block note. */
2106 if (NOTE_INSN_BASIC_BLOCK_P (before))
2107 before = NEXT_INSN (before);
2109 branch_label = gen_label_rtx ();
2110 LABEL_NUSES (branch_label)++;
2111 LABEL_PRESERVE_P (branch_label) = 1;
2112 insn = emit_label_before (branch_label, branch);
2113 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2114 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2116 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2117 recog_memoized (hint);
2118 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2119 HINTED_P (branch) = 1;
2121 if (GET_CODE (target) == LABEL_REF)
2122 HINTED_P (XEXP (target, 0)) = 1;
2123 else if (tablejump_p (branch, 0, &table))
2125 rtvec vec;
2126 int j;
2127 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2128 vec = XVEC (PATTERN (table), 0);
2129 else
2130 vec = XVEC (PATTERN (table), 1);
2131 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2132 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2135 if (distance >= 588)
2137 /* Make sure the hint isn't scheduled any earlier than this point,
2138 which could make it too far for the branch offest to fit */
2139 insn = emit_insn_before (gen_blockage (), hint);
2140 recog_memoized (insn);
2141 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2143 else if (distance <= 8 * 4)
2145 /* To guarantee at least 8 insns between the hint and branch we
2146 insert nops. */
2147 int d;
2148 for (d = distance; d < 8 * 4; d += 4)
2150 insn =
2151 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2152 recog_memoized (insn);
2153 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2156 /* Make sure any nops inserted aren't scheduled before the hint. */
2157 insn = emit_insn_after (gen_blockage (), hint);
2158 recog_memoized (insn);
2159 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2161 /* Make sure any nops inserted aren't scheduled after the call. */
2162 if (CALL_P (branch) && distance < 8 * 4)
2164 insn = emit_insn_before (gen_blockage (), branch);
2165 recog_memoized (insn);
2166 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2171 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2172 the rtx for the branch target. */
2173 static rtx
2174 get_branch_target (rtx branch)
2176 if (JUMP_P (branch))
2178 rtx set, src;
2180 /* Return statements */
2181 if (GET_CODE (PATTERN (branch)) == RETURN)
2182 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2184 /* ASM GOTOs. */
2185 if (extract_asm_operands (PATTERN (branch)) != NULL)
2186 return NULL;
2188 set = single_set (branch);
2189 src = SET_SRC (set);
2190 if (GET_CODE (SET_DEST (set)) != PC)
2191 abort ();
2193 if (GET_CODE (src) == IF_THEN_ELSE)
2195 rtx lab = 0;
2196 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2197 if (note)
2199 /* If the more probable case is not a fall through, then
2200 try a branch hint. */
2201 int prob = XINT (note, 0);
2202 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2203 && GET_CODE (XEXP (src, 1)) != PC)
2204 lab = XEXP (src, 1);
2205 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2206 && GET_CODE (XEXP (src, 2)) != PC)
2207 lab = XEXP (src, 2);
2209 if (lab)
2211 if (GET_CODE (lab) == RETURN)
2212 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2213 return lab;
2215 return 0;
2218 return src;
2220 else if (CALL_P (branch))
2222 rtx call;
2223 /* All of our call patterns are in a PARALLEL and the CALL is
2224 the first pattern in the PARALLEL. */
2225 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2226 abort ();
2227 call = XVECEXP (PATTERN (branch), 0, 0);
2228 if (GET_CODE (call) == SET)
2229 call = SET_SRC (call);
2230 if (GET_CODE (call) != CALL)
2231 abort ();
2232 return XEXP (XEXP (call, 0), 0);
2234 return 0;
2237 /* The special $hbr register is used to prevent the insn scheduler from
2238 moving hbr insns across instructions which invalidate them. It
2239 should only be used in a clobber, and this function searches for
2240 insns which clobber it. */
2241 static bool
2242 insn_clobbers_hbr (rtx insn)
2244 if (INSN_P (insn)
2245 && GET_CODE (PATTERN (insn)) == PARALLEL)
2247 rtx parallel = PATTERN (insn);
2248 rtx clobber;
2249 int j;
2250 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2252 clobber = XVECEXP (parallel, 0, j);
2253 if (GET_CODE (clobber) == CLOBBER
2254 && GET_CODE (XEXP (clobber, 0)) == REG
2255 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2256 return 1;
2259 return 0;
2262 /* Search up to 32 insns starting at FIRST:
2263 - at any kind of hinted branch, just return
2264 - at any unconditional branch in the first 15 insns, just return
2265 - at a call or indirect branch, after the first 15 insns, force it to
2266 an even address and return
2267 - at any unconditional branch, after the first 15 insns, force it to
2268 an even address.
2269 At then end of the search, insert an hbrp within 4 insns of FIRST,
2270 and an hbrp within 16 instructions of FIRST.
2272 static void
2273 insert_hbrp_for_ilb_runout (rtx first)
2275 rtx insn, before_4 = 0, before_16 = 0;
2276 int addr = 0, length, first_addr = -1;
2277 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2278 int insert_lnop_after = 0;
2279 for (insn = first; insn; insn = NEXT_INSN (insn))
2280 if (INSN_P (insn))
2282 if (first_addr == -1)
2283 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2284 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2285 length = get_attr_length (insn);
2287 if (before_4 == 0 && addr + length >= 4 * 4)
2288 before_4 = insn;
2289 /* We test for 14 instructions because the first hbrp will add
2290 up to 2 instructions. */
2291 if (before_16 == 0 && addr + length >= 14 * 4)
2292 before_16 = insn;
2294 if (INSN_CODE (insn) == CODE_FOR_hbr)
2296 /* Make sure an hbrp is at least 2 cycles away from a hint.
2297 Insert an lnop after the hbrp when necessary. */
2298 if (before_4 == 0 && addr > 0)
2300 before_4 = insn;
2301 insert_lnop_after |= 1;
2303 else if (before_4 && addr <= 4 * 4)
2304 insert_lnop_after |= 1;
2305 if (before_16 == 0 && addr > 10 * 4)
2307 before_16 = insn;
2308 insert_lnop_after |= 2;
2310 else if (before_16 && addr <= 14 * 4)
2311 insert_lnop_after |= 2;
2314 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2316 if (addr < hbrp_addr0)
2317 hbrp_addr0 = addr;
2318 else if (addr < hbrp_addr1)
2319 hbrp_addr1 = addr;
2322 if (CALL_P (insn) || JUMP_P (insn))
2324 if (HINTED_P (insn))
2325 return;
2327 /* Any branch after the first 15 insns should be on an even
2328 address to avoid a special case branch. There might be
2329 some nops and/or hbrps inserted, so we test after 10
2330 insns. */
2331 if (addr > 10 * 4)
2332 SCHED_ON_EVEN_P (insn) = 1;
2335 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2336 return;
2339 if (addr + length >= 32 * 4)
2341 gcc_assert (before_4 && before_16);
2342 if (hbrp_addr0 > 4 * 4)
2344 insn =
2345 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2346 recog_memoized (insn);
2347 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2348 INSN_ADDRESSES_NEW (insn,
2349 INSN_ADDRESSES (INSN_UID (before_4)));
2350 PUT_MODE (insn, GET_MODE (before_4));
2351 PUT_MODE (before_4, TImode);
2352 if (insert_lnop_after & 1)
2354 insn = emit_insn_before (gen_lnop (), before_4);
2355 recog_memoized (insn);
2356 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2357 INSN_ADDRESSES_NEW (insn,
2358 INSN_ADDRESSES (INSN_UID (before_4)));
2359 PUT_MODE (insn, TImode);
2362 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2363 && hbrp_addr1 > 16 * 4)
2365 insn =
2366 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2367 recog_memoized (insn);
2368 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2369 INSN_ADDRESSES_NEW (insn,
2370 INSN_ADDRESSES (INSN_UID (before_16)));
2371 PUT_MODE (insn, GET_MODE (before_16));
2372 PUT_MODE (before_16, TImode);
2373 if (insert_lnop_after & 2)
2375 insn = emit_insn_before (gen_lnop (), before_16);
2376 recog_memoized (insn);
2377 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2378 INSN_ADDRESSES_NEW (insn,
2379 INSN_ADDRESSES (INSN_UID
2380 (before_16)));
2381 PUT_MODE (insn, TImode);
2384 return;
2387 else if (BARRIER_P (insn))
2388 return;
2392 /* The SPU might hang when it executes 48 inline instructions after a
2393 hinted branch jumps to its hinted target. The beginning of a
2394 function and the return from a call might have been hinted, and
2395 must be handled as well. To prevent a hang we insert 2 hbrps. The
2396 first should be within 6 insns of the branch target. The second
2397 should be within 22 insns of the branch target. When determining
2398 if hbrps are necessary, we look for only 32 inline instructions,
2399 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2400 when inserting new hbrps, we insert them within 4 and 16 insns of
2401 the target. */
2402 static void
2403 insert_hbrp (void)
2405 rtx insn;
2406 if (TARGET_SAFE_HINTS)
2408 shorten_branches (get_insns ());
2409 /* Insert hbrp at beginning of function */
2410 insn = next_active_insn (get_insns ());
2411 if (insn)
2412 insert_hbrp_for_ilb_runout (insn);
2413 /* Insert hbrp after hinted targets. */
2414 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2415 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2416 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2420 static int in_spu_reorg;
2422 static void
2423 spu_var_tracking (void)
2425 if (flag_var_tracking)
2427 df_analyze ();
2428 timevar_push (TV_VAR_TRACKING);
2429 variable_tracking_main ();
2430 timevar_pop (TV_VAR_TRACKING);
2431 df_finish_pass (false);
2435 /* Insert branch hints. There are no branch optimizations after this
2436 pass, so it's safe to set our branch hints now. */
2437 static void
2438 spu_machine_dependent_reorg (void)
2440 sbitmap blocks;
2441 basic_block bb;
2442 rtx branch, insn;
2443 rtx branch_target = 0;
2444 int branch_addr = 0, insn_addr, required_dist = 0;
2445 int i;
2446 unsigned int j;
2448 if (!TARGET_BRANCH_HINTS || optimize == 0)
2450 /* We still do it for unoptimized code because an external
2451 function might have hinted a call or return. */
2452 compute_bb_for_insn ();
2453 insert_hbrp ();
2454 pad_bb ();
2455 spu_var_tracking ();
2456 free_bb_for_insn ();
2457 return;
2460 blocks = sbitmap_alloc (last_basic_block);
2461 bitmap_clear (blocks);
2463 in_spu_reorg = 1;
2464 compute_bb_for_insn ();
2466 /* (Re-)discover loops so that bb->loop_father can be used
2467 in the analysis below. */
2468 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2470 compact_blocks ();
2472 spu_bb_info =
2473 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2474 sizeof (struct spu_bb_info));
2476 /* We need exact insn addresses and lengths. */
2477 shorten_branches (get_insns ());
2479 for (i = n_basic_blocks - 1; i >= 0; i--)
2481 bb = BASIC_BLOCK (i);
2482 branch = 0;
2483 if (spu_bb_info[i].prop_jump)
2485 branch = spu_bb_info[i].prop_jump;
2486 branch_target = get_branch_target (branch);
2487 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2488 required_dist = spu_hint_dist;
2490 /* Search from end of a block to beginning. In this loop, find
2491 jumps which need a branch and emit them only when:
2492 - it's an indirect branch and we're at the insn which sets
2493 the register
2494 - we're at an insn that will invalidate the hint. e.g., a
2495 call, another hint insn, inline asm that clobbers $hbr, and
2496 some inlined operations (divmodsi4). Don't consider jumps
2497 because they are only at the end of a block and are
2498 considered when we are deciding whether to propagate
2499 - we're getting too far away from the branch. The hbr insns
2500 only have a signed 10 bit offset
2501 We go back as far as possible so the branch will be considered
2502 for propagation when we get to the beginning of the block. */
2503 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2505 if (INSN_P (insn))
2507 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2508 if (branch
2509 && ((GET_CODE (branch_target) == REG
2510 && set_of (branch_target, insn) != NULL_RTX)
2511 || insn_clobbers_hbr (insn)
2512 || branch_addr - insn_addr > 600))
2514 rtx next = NEXT_INSN (insn);
2515 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2516 if (insn != BB_END (bb)
2517 && branch_addr - next_addr >= required_dist)
2519 if (dump_file)
2520 fprintf (dump_file,
2521 "hint for %i in block %i before %i\n",
2522 INSN_UID (branch), bb->index,
2523 INSN_UID (next));
2524 spu_emit_branch_hint (next, branch, branch_target,
2525 branch_addr - next_addr, blocks);
2527 branch = 0;
2530 /* JUMP_P will only be true at the end of a block. When
2531 branch is already set it means we've previously decided
2532 to propagate a hint for that branch into this block. */
2533 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2535 branch = 0;
2536 if ((branch_target = get_branch_target (insn)))
2538 branch = insn;
2539 branch_addr = insn_addr;
2540 required_dist = spu_hint_dist;
2544 if (insn == BB_HEAD (bb))
2545 break;
2548 if (branch)
2550 /* If we haven't emitted a hint for this branch yet, it might
2551 be profitable to emit it in one of the predecessor blocks,
2552 especially for loops. */
2553 rtx bbend;
2554 basic_block prev = 0, prop = 0, prev2 = 0;
2555 int loop_exit = 0, simple_loop = 0;
2556 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2558 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2559 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2560 prev = EDGE_PRED (bb, j)->src;
2561 else
2562 prev2 = EDGE_PRED (bb, j)->src;
2564 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2565 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2566 loop_exit = 1;
2567 else if (EDGE_SUCC (bb, j)->dest == bb)
2568 simple_loop = 1;
2570 /* If this branch is a loop exit then propagate to previous
2571 fallthru block. This catches the cases when it is a simple
2572 loop or when there is an initial branch into the loop. */
2573 if (prev && (loop_exit || simple_loop)
2574 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2575 prop = prev;
2577 /* If there is only one adjacent predecessor. Don't propagate
2578 outside this loop. */
2579 else if (prev && single_pred_p (bb)
2580 && prev->loop_father == bb->loop_father)
2581 prop = prev;
2583 /* If this is the JOIN block of a simple IF-THEN then
2584 propagate the hint to the HEADER block. */
2585 else if (prev && prev2
2586 && EDGE_COUNT (bb->preds) == 2
2587 && EDGE_COUNT (prev->preds) == 1
2588 && EDGE_PRED (prev, 0)->src == prev2
2589 && prev2->loop_father == bb->loop_father
2590 && GET_CODE (branch_target) != REG)
2591 prop = prev;
2593 /* Don't propagate when:
2594 - this is a simple loop and the hint would be too far
2595 - this is not a simple loop and there are 16 insns in
2596 this block already
2597 - the predecessor block ends in a branch that will be
2598 hinted
2599 - the predecessor block ends in an insn that invalidates
2600 the hint */
2601 if (prop
2602 && prop->index >= 0
2603 && (bbend = BB_END (prop))
2604 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2605 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2606 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2608 if (dump_file)
2609 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2610 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2611 bb->index, prop->index, bb_loop_depth (bb),
2612 INSN_UID (branch), loop_exit, simple_loop,
2613 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2615 spu_bb_info[prop->index].prop_jump = branch;
2616 spu_bb_info[prop->index].bb_index = i;
2618 else if (branch_addr - next_addr >= required_dist)
2620 if (dump_file)
2621 fprintf (dump_file, "hint for %i in block %i before %i\n",
2622 INSN_UID (branch), bb->index,
2623 INSN_UID (NEXT_INSN (insn)));
2624 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2625 branch_addr - next_addr, blocks);
2627 branch = 0;
2630 free (spu_bb_info);
2632 if (!bitmap_empty_p (blocks))
2633 find_many_sub_basic_blocks (blocks);
2635 /* We have to schedule to make sure alignment is ok. */
2636 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2638 /* The hints need to be scheduled, so call it again. */
2639 schedule_insns ();
2640 df_finish_pass (true);
2642 insert_hbrp ();
2644 pad_bb ();
2646 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2647 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2649 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2650 between its branch label and the branch . We don't move the
2651 label because GCC expects it at the beginning of the block. */
2652 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2653 rtx label_ref = XVECEXP (unspec, 0, 0);
2654 rtx label = XEXP (label_ref, 0);
2655 rtx branch;
2656 int offset = 0;
2657 for (branch = NEXT_INSN (label);
2658 !JUMP_P (branch) && !CALL_P (branch);
2659 branch = NEXT_INSN (branch))
2660 if (NONJUMP_INSN_P (branch))
2661 offset += get_attr_length (branch);
2662 if (offset > 0)
2663 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2666 spu_var_tracking ();
2668 loop_optimizer_finalize ();
2670 free_bb_for_insn ();
2672 in_spu_reorg = 0;
2676 /* Insn scheduling routines, primarily for dual issue. */
2677 static int
2678 spu_sched_issue_rate (void)
2680 return 2;
2683 static int
2684 uses_ls_unit(rtx insn)
2686 rtx set = single_set (insn);
2687 if (set != 0
2688 && (GET_CODE (SET_DEST (set)) == MEM
2689 || GET_CODE (SET_SRC (set)) == MEM))
2690 return 1;
2691 return 0;
2694 static int
2695 get_pipe (rtx insn)
2697 enum attr_type t;
2698 /* Handle inline asm */
2699 if (INSN_CODE (insn) == -1)
2700 return -1;
2701 t = get_attr_type (insn);
2702 switch (t)
2704 case TYPE_CONVERT:
2705 return -2;
2706 case TYPE_MULTI0:
2707 return -1;
2709 case TYPE_FX2:
2710 case TYPE_FX3:
2711 case TYPE_SPR:
2712 case TYPE_NOP:
2713 case TYPE_FXB:
2714 case TYPE_FPD:
2715 case TYPE_FP6:
2716 case TYPE_FP7:
2717 return 0;
2719 case TYPE_LNOP:
2720 case TYPE_SHUF:
2721 case TYPE_LOAD:
2722 case TYPE_STORE:
2723 case TYPE_BR:
2724 case TYPE_MULTI1:
2725 case TYPE_HBR:
2726 case TYPE_IPREFETCH:
2727 return 1;
2728 default:
2729 abort ();
2734 /* haifa-sched.c has a static variable that keeps track of the current
2735 cycle. It is passed to spu_sched_reorder, and we record it here for
2736 use by spu_sched_variable_issue. It won't be accurate if the
2737 scheduler updates it's clock_var between the two calls. */
2738 static int clock_var;
2740 /* This is used to keep track of insn alignment. Set to 0 at the
2741 beginning of each block and increased by the "length" attr of each
2742 insn scheduled. */
2743 static int spu_sched_length;
2745 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2746 ready list appropriately in spu_sched_reorder(). */
2747 static int pipe0_clock;
2748 static int pipe1_clock;
2750 static int prev_clock_var;
2752 static int prev_priority;
2754 /* The SPU needs to load the next ilb sometime during the execution of
2755 the previous ilb. There is a potential conflict if every cycle has a
2756 load or store. To avoid the conflict we make sure the load/store
2757 unit is free for at least one cycle during the execution of insns in
2758 the previous ilb. */
2759 static int spu_ls_first;
2760 static int prev_ls_clock;
2762 static void
2763 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2764 int max_ready ATTRIBUTE_UNUSED)
2766 spu_sched_length = 0;
2769 static void
2770 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2771 int max_ready ATTRIBUTE_UNUSED)
2773 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2775 /* When any block might be at least 8-byte aligned, assume they
2776 will all be at least 8-byte aligned to make sure dual issue
2777 works out correctly. */
2778 spu_sched_length = 0;
2780 spu_ls_first = INT_MAX;
2781 clock_var = -1;
2782 prev_ls_clock = -1;
2783 pipe0_clock = -1;
2784 pipe1_clock = -1;
2785 prev_clock_var = -1;
2786 prev_priority = -1;
2789 static int
2790 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2791 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2793 int len;
2794 int p;
2795 if (GET_CODE (PATTERN (insn)) == USE
2796 || GET_CODE (PATTERN (insn)) == CLOBBER
2797 || (len = get_attr_length (insn)) == 0)
2798 return more;
2800 spu_sched_length += len;
2802 /* Reset on inline asm */
2803 if (INSN_CODE (insn) == -1)
2805 spu_ls_first = INT_MAX;
2806 pipe0_clock = -1;
2807 pipe1_clock = -1;
2808 return 0;
2810 p = get_pipe (insn);
2811 if (p == 0)
2812 pipe0_clock = clock_var;
2813 else
2814 pipe1_clock = clock_var;
2816 if (in_spu_reorg)
2818 if (clock_var - prev_ls_clock > 1
2819 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2820 spu_ls_first = INT_MAX;
2821 if (uses_ls_unit (insn))
2823 if (spu_ls_first == INT_MAX)
2824 spu_ls_first = spu_sched_length;
2825 prev_ls_clock = clock_var;
2828 /* The scheduler hasn't inserted the nop, but we will later on.
2829 Include those nops in spu_sched_length. */
2830 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2831 spu_sched_length += 4;
2832 prev_clock_var = clock_var;
2834 /* more is -1 when called from spu_sched_reorder for new insns
2835 that don't have INSN_PRIORITY */
2836 if (more >= 0)
2837 prev_priority = INSN_PRIORITY (insn);
2840 /* Always try issuing more insns. spu_sched_reorder will decide
2841 when the cycle should be advanced. */
2842 return 1;
2845 /* This function is called for both TARGET_SCHED_REORDER and
2846 TARGET_SCHED_REORDER2. */
2847 static int
2848 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2849 rtx *ready, int *nreadyp, int clock)
2851 int i, nready = *nreadyp;
2852 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2853 rtx insn;
2855 clock_var = clock;
2857 if (nready <= 0 || pipe1_clock >= clock)
2858 return 0;
2860 /* Find any rtl insns that don't generate assembly insns and schedule
2861 them first. */
2862 for (i = nready - 1; i >= 0; i--)
2864 insn = ready[i];
2865 if (INSN_CODE (insn) == -1
2866 || INSN_CODE (insn) == CODE_FOR_blockage
2867 || (INSN_P (insn) && get_attr_length (insn) == 0))
2869 ready[i] = ready[nready - 1];
2870 ready[nready - 1] = insn;
2871 return 1;
2875 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2876 for (i = 0; i < nready; i++)
2877 if (INSN_CODE (ready[i]) != -1)
2879 insn = ready[i];
2880 switch (get_attr_type (insn))
2882 default:
2883 case TYPE_MULTI0:
2884 case TYPE_CONVERT:
2885 case TYPE_FX2:
2886 case TYPE_FX3:
2887 case TYPE_SPR:
2888 case TYPE_NOP:
2889 case TYPE_FXB:
2890 case TYPE_FPD:
2891 case TYPE_FP6:
2892 case TYPE_FP7:
2893 pipe_0 = i;
2894 break;
2895 case TYPE_LOAD:
2896 case TYPE_STORE:
2897 pipe_ls = i;
2898 case TYPE_LNOP:
2899 case TYPE_SHUF:
2900 case TYPE_BR:
2901 case TYPE_MULTI1:
2902 case TYPE_HBR:
2903 pipe_1 = i;
2904 break;
2905 case TYPE_IPREFETCH:
2906 pipe_hbrp = i;
2907 break;
2911 /* In the first scheduling phase, schedule loads and stores together
2912 to increase the chance they will get merged during postreload CSE. */
2913 if (!reload_completed && pipe_ls >= 0)
2915 insn = ready[pipe_ls];
2916 ready[pipe_ls] = ready[nready - 1];
2917 ready[nready - 1] = insn;
2918 return 1;
2921 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2922 if (pipe_hbrp >= 0)
2923 pipe_1 = pipe_hbrp;
2925 /* When we have loads/stores in every cycle of the last 15 insns and
2926 we are about to schedule another load/store, emit an hbrp insn
2927 instead. */
2928 if (in_spu_reorg
2929 && spu_sched_length - spu_ls_first >= 4 * 15
2930 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2932 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2933 recog_memoized (insn);
2934 if (pipe0_clock < clock)
2935 PUT_MODE (insn, TImode);
2936 spu_sched_variable_issue (file, verbose, insn, -1);
2937 return 0;
2940 /* In general, we want to emit nops to increase dual issue, but dual
2941 issue isn't faster when one of the insns could be scheduled later
2942 without effecting the critical path. We look at INSN_PRIORITY to
2943 make a good guess, but it isn't perfect so -mdual-nops=n can be
2944 used to effect it. */
2945 if (in_spu_reorg && spu_dual_nops < 10)
2947 /* When we are at an even address and we are not issuing nops to
2948 improve scheduling then we need to advance the cycle. */
2949 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2950 && (spu_dual_nops == 0
2951 || (pipe_1 != -1
2952 && prev_priority >
2953 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2954 return 0;
2956 /* When at an odd address, schedule the highest priority insn
2957 without considering pipeline. */
2958 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2959 && (spu_dual_nops == 0
2960 || (prev_priority >
2961 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2962 return 1;
2966 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2967 pipe0 insn in the ready list, schedule it. */
2968 if (pipe0_clock < clock && pipe_0 >= 0)
2969 schedule_i = pipe_0;
2971 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2972 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2973 else
2974 schedule_i = pipe_1;
2976 if (schedule_i > -1)
2978 insn = ready[schedule_i];
2979 ready[schedule_i] = ready[nready - 1];
2980 ready[nready - 1] = insn;
2981 return 1;
2983 return 0;
2986 /* INSN is dependent on DEP_INSN. */
2987 static int
2988 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
2990 rtx set;
2992 /* The blockage pattern is used to prevent instructions from being
2993 moved across it and has no cost. */
2994 if (INSN_CODE (insn) == CODE_FOR_blockage
2995 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2996 return 0;
2998 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2999 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3000 return 0;
3002 /* Make sure hbrps are spread out. */
3003 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3004 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3005 return 8;
3007 /* Make sure hints and hbrps are 2 cycles apart. */
3008 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3009 || INSN_CODE (insn) == CODE_FOR_hbr)
3010 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3011 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3012 return 2;
3014 /* An hbrp has no real dependency on other insns. */
3015 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3016 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3017 return 0;
3019 /* Assuming that it is unlikely an argument register will be used in
3020 the first cycle of the called function, we reduce the cost for
3021 slightly better scheduling of dep_insn. When not hinted, the
3022 mispredicted branch would hide the cost as well. */
3023 if (CALL_P (insn))
3025 rtx target = get_branch_target (insn);
3026 if (GET_CODE (target) != REG || !set_of (target, insn))
3027 return cost - 2;
3028 return cost;
3031 /* And when returning from a function, let's assume the return values
3032 are completed sooner too. */
3033 if (CALL_P (dep_insn))
3034 return cost - 2;
3036 /* Make sure an instruction that loads from the back chain is schedule
3037 away from the return instruction so a hint is more likely to get
3038 issued. */
3039 if (INSN_CODE (insn) == CODE_FOR__return
3040 && (set = single_set (dep_insn))
3041 && GET_CODE (SET_DEST (set)) == REG
3042 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3043 return 20;
3045 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3046 scheduler makes every insn in a block anti-dependent on the final
3047 jump_insn. We adjust here so higher cost insns will get scheduled
3048 earlier. */
3049 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3050 return insn_cost (dep_insn) - 3;
3052 return cost;
3055 /* Create a CONST_DOUBLE from a string. */
3057 spu_float_const (const char *string, enum machine_mode mode)
3059 REAL_VALUE_TYPE value;
3060 value = REAL_VALUE_ATOF (string, mode);
3061 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3065 spu_constant_address_p (rtx x)
3067 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3068 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3069 || GET_CODE (x) == HIGH);
3072 static enum spu_immediate
3073 which_immediate_load (HOST_WIDE_INT val)
3075 gcc_assert (val == trunc_int_for_mode (val, SImode));
3077 if (val >= -0x8000 && val <= 0x7fff)
3078 return SPU_IL;
3079 if (val >= 0 && val <= 0x3ffff)
3080 return SPU_ILA;
3081 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3082 return SPU_ILH;
3083 if ((val & 0xffff) == 0)
3084 return SPU_ILHU;
3086 return SPU_NONE;
3089 /* Return true when OP can be loaded by one of the il instructions, or
3090 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3092 immediate_load_p (rtx op, enum machine_mode mode)
3094 if (CONSTANT_P (op))
3096 enum immediate_class c = classify_immediate (op, mode);
3097 return c == IC_IL1 || c == IC_IL1s
3098 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3100 return 0;
3103 /* Return true if the first SIZE bytes of arr is a constant that can be
3104 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3105 represent the size and offset of the instruction to use. */
3106 static int
3107 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3109 int cpat, run, i, start;
3110 cpat = 1;
3111 run = 0;
3112 start = -1;
3113 for (i = 0; i < size && cpat; i++)
3114 if (arr[i] != i+16)
3116 if (!run)
3118 start = i;
3119 if (arr[i] == 3)
3120 run = 1;
3121 else if (arr[i] == 2 && arr[i+1] == 3)
3122 run = 2;
3123 else if (arr[i] == 0)
3125 while (arr[i+run] == run && i+run < 16)
3126 run++;
3127 if (run != 4 && run != 8)
3128 cpat = 0;
3130 else
3131 cpat = 0;
3132 if ((i & (run-1)) != 0)
3133 cpat = 0;
3134 i += run;
3136 else
3137 cpat = 0;
3139 if (cpat && (run || size < 16))
3141 if (run == 0)
3142 run = 1;
3143 if (prun)
3144 *prun = run;
3145 if (pstart)
3146 *pstart = start == -1 ? 16-run : start;
3147 return 1;
3149 return 0;
3152 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3153 it into a register. MODE is only valid when OP is a CONST_INT. */
3154 static enum immediate_class
3155 classify_immediate (rtx op, enum machine_mode mode)
3157 HOST_WIDE_INT val;
3158 unsigned char arr[16];
3159 int i, j, repeated, fsmbi, repeat;
3161 gcc_assert (CONSTANT_P (op));
3163 if (GET_MODE (op) != VOIDmode)
3164 mode = GET_MODE (op);
3166 /* A V4SI const_vector with all identical symbols is ok. */
3167 if (!flag_pic
3168 && mode == V4SImode
3169 && GET_CODE (op) == CONST_VECTOR
3170 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3171 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3172 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3173 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3174 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3175 op = CONST_VECTOR_ELT (op, 0);
3177 switch (GET_CODE (op))
3179 case SYMBOL_REF:
3180 case LABEL_REF:
3181 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3183 case CONST:
3184 /* We can never know if the resulting address fits in 18 bits and can be
3185 loaded with ila. For now, assume the address will not overflow if
3186 the displacement is "small" (fits 'K' constraint). */
3187 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3189 rtx sym = XEXP (XEXP (op, 0), 0);
3190 rtx cst = XEXP (XEXP (op, 0), 1);
3192 if (GET_CODE (sym) == SYMBOL_REF
3193 && GET_CODE (cst) == CONST_INT
3194 && satisfies_constraint_K (cst))
3195 return IC_IL1s;
3197 return IC_IL2s;
3199 case HIGH:
3200 return IC_IL1s;
3202 case CONST_VECTOR:
3203 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3204 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3205 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3206 return IC_POOL;
3207 /* Fall through. */
3209 case CONST_INT:
3210 case CONST_DOUBLE:
3211 constant_to_array (mode, op, arr);
3213 /* Check that each 4-byte slot is identical. */
3214 repeated = 1;
3215 for (i = 4; i < 16; i += 4)
3216 for (j = 0; j < 4; j++)
3217 if (arr[j] != arr[i + j])
3218 repeated = 0;
3220 if (repeated)
3222 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3223 val = trunc_int_for_mode (val, SImode);
3225 if (which_immediate_load (val) != SPU_NONE)
3226 return IC_IL1;
3229 /* Any mode of 2 bytes or smaller can be loaded with an il
3230 instruction. */
3231 gcc_assert (GET_MODE_SIZE (mode) > 2);
3233 fsmbi = 1;
3234 repeat = 0;
3235 for (i = 0; i < 16 && fsmbi; i++)
3236 if (arr[i] != 0 && repeat == 0)
3237 repeat = arr[i];
3238 else if (arr[i] != 0 && arr[i] != repeat)
3239 fsmbi = 0;
3240 if (fsmbi)
3241 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3243 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3244 return IC_CPAT;
3246 if (repeated)
3247 return IC_IL2;
3249 return IC_POOL;
3250 default:
3251 break;
3253 gcc_unreachable ();
3256 static enum spu_immediate
3257 which_logical_immediate (HOST_WIDE_INT val)
3259 gcc_assert (val == trunc_int_for_mode (val, SImode));
3261 if (val >= -0x200 && val <= 0x1ff)
3262 return SPU_ORI;
3263 if (val >= 0 && val <= 0xffff)
3264 return SPU_IOHL;
3265 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3267 val = trunc_int_for_mode (val, HImode);
3268 if (val >= -0x200 && val <= 0x1ff)
3269 return SPU_ORHI;
3270 if ((val & 0xff) == ((val >> 8) & 0xff))
3272 val = trunc_int_for_mode (val, QImode);
3273 if (val >= -0x200 && val <= 0x1ff)
3274 return SPU_ORBI;
3277 return SPU_NONE;
3280 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3281 CONST_DOUBLEs. */
3282 static int
3283 const_vector_immediate_p (rtx x)
3285 int i;
3286 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3287 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3288 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3289 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3290 return 0;
3291 return 1;
3295 logical_immediate_p (rtx op, enum machine_mode mode)
3297 HOST_WIDE_INT val;
3298 unsigned char arr[16];
3299 int i, j;
3301 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3302 || GET_CODE (op) == CONST_VECTOR);
3304 if (GET_CODE (op) == CONST_VECTOR
3305 && !const_vector_immediate_p (op))
3306 return 0;
3308 if (GET_MODE (op) != VOIDmode)
3309 mode = GET_MODE (op);
3311 constant_to_array (mode, op, arr);
3313 /* Check that bytes are repeated. */
3314 for (i = 4; i < 16; i += 4)
3315 for (j = 0; j < 4; j++)
3316 if (arr[j] != arr[i + j])
3317 return 0;
3319 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3320 val = trunc_int_for_mode (val, SImode);
3322 i = which_logical_immediate (val);
3323 return i != SPU_NONE && i != SPU_IOHL;
3327 iohl_immediate_p (rtx op, enum machine_mode mode)
3329 HOST_WIDE_INT val;
3330 unsigned char arr[16];
3331 int i, j;
3333 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3334 || GET_CODE (op) == CONST_VECTOR);
3336 if (GET_CODE (op) == CONST_VECTOR
3337 && !const_vector_immediate_p (op))
3338 return 0;
3340 if (GET_MODE (op) != VOIDmode)
3341 mode = GET_MODE (op);
3343 constant_to_array (mode, op, arr);
3345 /* Check that bytes are repeated. */
3346 for (i = 4; i < 16; i += 4)
3347 for (j = 0; j < 4; j++)
3348 if (arr[j] != arr[i + j])
3349 return 0;
3351 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3352 val = trunc_int_for_mode (val, SImode);
3354 return val >= 0 && val <= 0xffff;
3358 arith_immediate_p (rtx op, enum machine_mode mode,
3359 HOST_WIDE_INT low, HOST_WIDE_INT high)
3361 HOST_WIDE_INT val;
3362 unsigned char arr[16];
3363 int bytes, i, j;
3365 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3366 || GET_CODE (op) == CONST_VECTOR);
3368 if (GET_CODE (op) == CONST_VECTOR
3369 && !const_vector_immediate_p (op))
3370 return 0;
3372 if (GET_MODE (op) != VOIDmode)
3373 mode = GET_MODE (op);
3375 constant_to_array (mode, op, arr);
3377 if (VECTOR_MODE_P (mode))
3378 mode = GET_MODE_INNER (mode);
3380 bytes = GET_MODE_SIZE (mode);
3381 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3383 /* Check that bytes are repeated. */
3384 for (i = bytes; i < 16; i += bytes)
3385 for (j = 0; j < bytes; j++)
3386 if (arr[j] != arr[i + j])
3387 return 0;
3389 val = arr[0];
3390 for (j = 1; j < bytes; j++)
3391 val = (val << 8) | arr[j];
3393 val = trunc_int_for_mode (val, mode);
3395 return val >= low && val <= high;
3398 /* TRUE when op is an immediate and an exact power of 2, and given that
3399 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3400 all entries must be the same. */
3401 bool
3402 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3404 enum machine_mode int_mode;
3405 HOST_WIDE_INT val;
3406 unsigned char arr[16];
3407 int bytes, i, j;
3409 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3410 || GET_CODE (op) == CONST_VECTOR);
3412 if (GET_CODE (op) == CONST_VECTOR
3413 && !const_vector_immediate_p (op))
3414 return 0;
3416 if (GET_MODE (op) != VOIDmode)
3417 mode = GET_MODE (op);
3419 constant_to_array (mode, op, arr);
3421 if (VECTOR_MODE_P (mode))
3422 mode = GET_MODE_INNER (mode);
3424 bytes = GET_MODE_SIZE (mode);
3425 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3427 /* Check that bytes are repeated. */
3428 for (i = bytes; i < 16; i += bytes)
3429 for (j = 0; j < bytes; j++)
3430 if (arr[j] != arr[i + j])
3431 return 0;
3433 val = arr[0];
3434 for (j = 1; j < bytes; j++)
3435 val = (val << 8) | arr[j];
3437 val = trunc_int_for_mode (val, int_mode);
3439 /* Currently, we only handle SFmode */
3440 gcc_assert (mode == SFmode);
3441 if (mode == SFmode)
3443 int exp = (val >> 23) - 127;
3444 return val > 0 && (val & 0x007fffff) == 0
3445 && exp >= low && exp <= high;
3447 return FALSE;
3450 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3452 static int
3453 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3455 rtx x = *px;
3456 tree decl;
3458 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3460 rtx plus = XEXP (x, 0);
3461 rtx op0 = XEXP (plus, 0);
3462 rtx op1 = XEXP (plus, 1);
3463 if (GET_CODE (op1) == CONST_INT)
3464 x = op0;
3467 return (GET_CODE (x) == SYMBOL_REF
3468 && (decl = SYMBOL_REF_DECL (x)) != 0
3469 && TREE_CODE (decl) == VAR_DECL
3470 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3473 /* We accept:
3474 - any 32-bit constant (SImode, SFmode)
3475 - any constant that can be generated with fsmbi (any mode)
3476 - a 64-bit constant where the high and low bits are identical
3477 (DImode, DFmode)
3478 - a 128-bit constant where the four 32-bit words match. */
3479 bool
3480 spu_legitimate_constant_p (enum machine_mode mode, rtx x)
3482 if (GET_CODE (x) == HIGH)
3483 x = XEXP (x, 0);
3485 /* Reject any __ea qualified reference. These can't appear in
3486 instructions but must be forced to the constant pool. */
3487 if (for_each_rtx (&x, ea_symbol_ref, 0))
3488 return 0;
3490 /* V4SI with all identical symbols is valid. */
3491 if (!flag_pic
3492 && mode == V4SImode
3493 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3494 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3495 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3496 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3497 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3498 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3500 if (GET_CODE (x) == CONST_VECTOR
3501 && !const_vector_immediate_p (x))
3502 return 0;
3503 return 1;
3506 /* Valid address are:
3507 - symbol_ref, label_ref, const
3508 - reg
3509 - reg + const_int, where const_int is 16 byte aligned
3510 - reg + reg, alignment doesn't matter
3511 The alignment matters in the reg+const case because lqd and stqd
3512 ignore the 4 least significant bits of the const. We only care about
3513 16 byte modes because the expand phase will change all smaller MEM
3514 references to TImode. */
3515 static bool
3516 spu_legitimate_address_p (enum machine_mode mode,
3517 rtx x, bool reg_ok_strict)
3519 int aligned = GET_MODE_SIZE (mode) >= 16;
3520 if (aligned
3521 && GET_CODE (x) == AND
3522 && GET_CODE (XEXP (x, 1)) == CONST_INT
3523 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3524 x = XEXP (x, 0);
3525 switch (GET_CODE (x))
3527 case LABEL_REF:
3528 return !TARGET_LARGE_MEM;
3530 case SYMBOL_REF:
3531 case CONST:
3532 /* Keep __ea references until reload so that spu_expand_mov can see them
3533 in MEMs. */
3534 if (ea_symbol_ref (&x, 0))
3535 return !reload_in_progress && !reload_completed;
3536 return !TARGET_LARGE_MEM;
3538 case CONST_INT:
3539 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3541 case SUBREG:
3542 x = XEXP (x, 0);
3543 if (REG_P (x))
3544 return 0;
3546 case REG:
3547 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3549 case PLUS:
3550 case LO_SUM:
3552 rtx op0 = XEXP (x, 0);
3553 rtx op1 = XEXP (x, 1);
3554 if (GET_CODE (op0) == SUBREG)
3555 op0 = XEXP (op0, 0);
3556 if (GET_CODE (op1) == SUBREG)
3557 op1 = XEXP (op1, 0);
3558 if (GET_CODE (op0) == REG
3559 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3560 && GET_CODE (op1) == CONST_INT
3561 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3562 /* If virtual registers are involved, the displacement will
3563 change later on anyway, so checking would be premature.
3564 Reload will make sure the final displacement after
3565 register elimination is OK. */
3566 || op0 == arg_pointer_rtx
3567 || op0 == frame_pointer_rtx
3568 || op0 == virtual_stack_vars_rtx)
3569 && (!aligned || (INTVAL (op1) & 15) == 0))
3570 return TRUE;
3571 if (GET_CODE (op0) == REG
3572 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3573 && GET_CODE (op1) == REG
3574 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3575 return TRUE;
3577 break;
3579 default:
3580 break;
3582 return FALSE;
3585 /* Like spu_legitimate_address_p, except with named addresses. */
3586 static bool
3587 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3588 bool reg_ok_strict, addr_space_t as)
3590 if (as == ADDR_SPACE_EA)
3591 return (REG_P (x) && (GET_MODE (x) == EAmode));
3593 else if (as != ADDR_SPACE_GENERIC)
3594 gcc_unreachable ();
3596 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3599 /* When the address is reg + const_int, force the const_int into a
3600 register. */
3601 static rtx
3602 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3603 enum machine_mode mode ATTRIBUTE_UNUSED)
3605 rtx op0, op1;
3606 /* Make sure both operands are registers. */
3607 if (GET_CODE (x) == PLUS)
3609 op0 = XEXP (x, 0);
3610 op1 = XEXP (x, 1);
3611 if (ALIGNED_SYMBOL_REF_P (op0))
3613 op0 = force_reg (Pmode, op0);
3614 mark_reg_pointer (op0, 128);
3616 else if (GET_CODE (op0) != REG)
3617 op0 = force_reg (Pmode, op0);
3618 if (ALIGNED_SYMBOL_REF_P (op1))
3620 op1 = force_reg (Pmode, op1);
3621 mark_reg_pointer (op1, 128);
3623 else if (GET_CODE (op1) != REG)
3624 op1 = force_reg (Pmode, op1);
3625 x = gen_rtx_PLUS (Pmode, op0, op1);
3627 return x;
3630 /* Like spu_legitimate_address, except with named address support. */
3631 static rtx
3632 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3633 addr_space_t as)
3635 if (as != ADDR_SPACE_GENERIC)
3636 return x;
3638 return spu_legitimize_address (x, oldx, mode);
3641 /* Reload reg + const_int for out-of-range displacements. */
3643 spu_legitimize_reload_address (rtx ad, enum machine_mode mode ATTRIBUTE_UNUSED,
3644 int opnum, int type)
3646 bool removed_and = false;
3648 if (GET_CODE (ad) == AND
3649 && CONST_INT_P (XEXP (ad, 1))
3650 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3652 ad = XEXP (ad, 0);
3653 removed_and = true;
3656 if (GET_CODE (ad) == PLUS
3657 && REG_P (XEXP (ad, 0))
3658 && CONST_INT_P (XEXP (ad, 1))
3659 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3660 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3662 /* Unshare the sum. */
3663 ad = copy_rtx (ad);
3665 /* Reload the displacement. */
3666 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3667 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3668 opnum, (enum reload_type) type);
3670 /* Add back AND for alignment if we stripped it. */
3671 if (removed_and)
3672 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3674 return ad;
3677 return NULL_RTX;
3680 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3681 struct attribute_spec.handler. */
3682 static tree
3683 spu_handle_fndecl_attribute (tree * node,
3684 tree name,
3685 tree args ATTRIBUTE_UNUSED,
3686 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3688 if (TREE_CODE (*node) != FUNCTION_DECL)
3690 warning (0, "%qE attribute only applies to functions",
3691 name);
3692 *no_add_attrs = true;
3695 return NULL_TREE;
3698 /* Handle the "vector" attribute. */
3699 static tree
3700 spu_handle_vector_attribute (tree * node, tree name,
3701 tree args ATTRIBUTE_UNUSED,
3702 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3704 tree type = *node, result = NULL_TREE;
3705 enum machine_mode mode;
3706 int unsigned_p;
3708 while (POINTER_TYPE_P (type)
3709 || TREE_CODE (type) == FUNCTION_TYPE
3710 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3711 type = TREE_TYPE (type);
3713 mode = TYPE_MODE (type);
3715 unsigned_p = TYPE_UNSIGNED (type);
3716 switch (mode)
3718 case DImode:
3719 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3720 break;
3721 case SImode:
3722 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3723 break;
3724 case HImode:
3725 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3726 break;
3727 case QImode:
3728 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3729 break;
3730 case SFmode:
3731 result = V4SF_type_node;
3732 break;
3733 case DFmode:
3734 result = V2DF_type_node;
3735 break;
3736 default:
3737 break;
3740 /* Propagate qualifiers attached to the element type
3741 onto the vector type. */
3742 if (result && result != type && TYPE_QUALS (type))
3743 result = build_qualified_type (result, TYPE_QUALS (type));
3745 *no_add_attrs = true; /* No need to hang on to the attribute. */
3747 if (!result)
3748 warning (0, "%qE attribute ignored", name);
3749 else
3750 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3752 return NULL_TREE;
3755 /* Return nonzero if FUNC is a naked function. */
3756 static int
3757 spu_naked_function_p (tree func)
3759 tree a;
3761 if (TREE_CODE (func) != FUNCTION_DECL)
3762 abort ();
3764 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3765 return a != NULL_TREE;
3769 spu_initial_elimination_offset (int from, int to)
3771 int saved_regs_size = spu_saved_regs_size ();
3772 int sp_offset = 0;
3773 if (!crtl->is_leaf || crtl->outgoing_args_size
3774 || get_frame_size () || saved_regs_size)
3775 sp_offset = STACK_POINTER_OFFSET;
3776 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3777 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3778 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3779 return get_frame_size ();
3780 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3781 return sp_offset + crtl->outgoing_args_size
3782 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3783 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3784 return get_frame_size () + saved_regs_size + sp_offset;
3785 else
3786 gcc_unreachable ();
3790 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3792 enum machine_mode mode = TYPE_MODE (type);
3793 int byte_size = ((mode == BLKmode)
3794 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3796 /* Make sure small structs are left justified in a register. */
3797 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3798 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3800 enum machine_mode smode;
3801 rtvec v;
3802 int i;
3803 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3804 int n = byte_size / UNITS_PER_WORD;
3805 v = rtvec_alloc (nregs);
3806 for (i = 0; i < n; i++)
3808 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3809 gen_rtx_REG (TImode,
3810 FIRST_RETURN_REGNUM
3811 + i),
3812 GEN_INT (UNITS_PER_WORD * i));
3813 byte_size -= UNITS_PER_WORD;
3816 if (n < nregs)
3818 if (byte_size < 4)
3819 byte_size = 4;
3820 smode =
3821 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3822 RTVEC_ELT (v, n) =
3823 gen_rtx_EXPR_LIST (VOIDmode,
3824 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3825 GEN_INT (UNITS_PER_WORD * n));
3827 return gen_rtx_PARALLEL (mode, v);
3829 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3832 static rtx
3833 spu_function_arg (cumulative_args_t cum_v,
3834 enum machine_mode mode,
3835 const_tree type, bool named ATTRIBUTE_UNUSED)
3837 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3838 int byte_size;
3840 if (*cum >= MAX_REGISTER_ARGS)
3841 return 0;
3843 byte_size = ((mode == BLKmode)
3844 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3846 /* The ABI does not allow parameters to be passed partially in
3847 reg and partially in stack. */
3848 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3849 return 0;
3851 /* Make sure small structs are left justified in a register. */
3852 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3853 && byte_size < UNITS_PER_WORD && byte_size > 0)
3855 enum machine_mode smode;
3856 rtx gr_reg;
3857 if (byte_size < 4)
3858 byte_size = 4;
3859 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3860 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3861 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3862 const0_rtx);
3863 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3865 else
3866 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3869 static void
3870 spu_function_arg_advance (cumulative_args_t cum_v, enum machine_mode mode,
3871 const_tree type, bool named ATTRIBUTE_UNUSED)
3873 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3875 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3877 : mode == BLKmode
3878 ? ((int_size_in_bytes (type) + 15) / 16)
3879 : mode == VOIDmode
3881 : HARD_REGNO_NREGS (cum, mode));
3884 /* Variable sized types are passed by reference. */
3885 static bool
3886 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3887 enum machine_mode mode ATTRIBUTE_UNUSED,
3888 const_tree type, bool named ATTRIBUTE_UNUSED)
3890 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3894 /* Var args. */
3896 /* Create and return the va_list datatype.
3898 On SPU, va_list is an array type equivalent to
3900 typedef struct __va_list_tag
3902 void *__args __attribute__((__aligned(16)));
3903 void *__skip __attribute__((__aligned(16)));
3905 } va_list[1];
3907 where __args points to the arg that will be returned by the next
3908 va_arg(), and __skip points to the previous stack frame such that
3909 when __args == __skip we should advance __args by 32 bytes. */
3910 static tree
3911 spu_build_builtin_va_list (void)
3913 tree f_args, f_skip, record, type_decl;
3914 bool owp;
3916 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3918 type_decl =
3919 build_decl (BUILTINS_LOCATION,
3920 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3922 f_args = build_decl (BUILTINS_LOCATION,
3923 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3924 f_skip = build_decl (BUILTINS_LOCATION,
3925 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3927 DECL_FIELD_CONTEXT (f_args) = record;
3928 DECL_ALIGN (f_args) = 128;
3929 DECL_USER_ALIGN (f_args) = 1;
3931 DECL_FIELD_CONTEXT (f_skip) = record;
3932 DECL_ALIGN (f_skip) = 128;
3933 DECL_USER_ALIGN (f_skip) = 1;
3935 TYPE_STUB_DECL (record) = type_decl;
3936 TYPE_NAME (record) = type_decl;
3937 TYPE_FIELDS (record) = f_args;
3938 DECL_CHAIN (f_args) = f_skip;
3940 /* We know this is being padded and we want it too. It is an internal
3941 type so hide the warnings from the user. */
3942 owp = warn_padded;
3943 warn_padded = false;
3945 layout_type (record);
3947 warn_padded = owp;
3949 /* The correct type is an array type of one element. */
3950 return build_array_type (record, build_index_type (size_zero_node));
3953 /* Implement va_start by filling the va_list structure VALIST.
3954 NEXTARG points to the first anonymous stack argument.
3956 The following global variables are used to initialize
3957 the va_list structure:
3959 crtl->args.info;
3960 the CUMULATIVE_ARGS for this function
3962 crtl->args.arg_offset_rtx:
3963 holds the offset of the first anonymous stack argument
3964 (relative to the virtual arg pointer). */
3966 static void
3967 spu_va_start (tree valist, rtx nextarg)
3969 tree f_args, f_skip;
3970 tree args, skip, t;
3972 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3973 f_skip = DECL_CHAIN (f_args);
3975 valist = build_simple_mem_ref (valist);
3976 args =
3977 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3978 skip =
3979 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3981 /* Find the __args area. */
3982 t = make_tree (TREE_TYPE (args), nextarg);
3983 if (crtl->args.pretend_args_size > 0)
3984 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3985 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3986 TREE_SIDE_EFFECTS (t) = 1;
3987 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3989 /* Find the __skip area. */
3990 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3991 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3992 - STACK_POINTER_OFFSET));
3993 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3994 TREE_SIDE_EFFECTS (t) = 1;
3995 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3998 /* Gimplify va_arg by updating the va_list structure
3999 VALIST as required to retrieve an argument of type
4000 TYPE, and returning that argument.
4002 ret = va_arg(VALIST, TYPE);
4004 generates code equivalent to:
4006 paddedsize = (sizeof(TYPE) + 15) & -16;
4007 if (VALIST.__args + paddedsize > VALIST.__skip
4008 && VALIST.__args <= VALIST.__skip)
4009 addr = VALIST.__skip + 32;
4010 else
4011 addr = VALIST.__args;
4012 VALIST.__args = addr + paddedsize;
4013 ret = *(TYPE *)addr;
4015 static tree
4016 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4017 gimple_seq * post_p ATTRIBUTE_UNUSED)
4019 tree f_args, f_skip;
4020 tree args, skip;
4021 HOST_WIDE_INT size, rsize;
4022 tree addr, tmp;
4023 bool pass_by_reference_p;
4025 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4026 f_skip = DECL_CHAIN (f_args);
4028 valist = build_simple_mem_ref (valist);
4029 args =
4030 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4031 skip =
4032 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4034 addr = create_tmp_var (ptr_type_node, "va_arg");
4036 /* if an object is dynamically sized, a pointer to it is passed
4037 instead of the object itself. */
4038 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4039 false);
4040 if (pass_by_reference_p)
4041 type = build_pointer_type (type);
4042 size = int_size_in_bytes (type);
4043 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4045 /* build conditional expression to calculate addr. The expression
4046 will be gimplified later. */
4047 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4048 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4049 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4050 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4051 unshare_expr (skip)));
4053 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4054 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4055 unshare_expr (args));
4057 gimplify_assign (addr, tmp, pre_p);
4059 /* update VALIST.__args */
4060 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4061 gimplify_assign (unshare_expr (args), tmp, pre_p);
4063 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4064 addr);
4066 if (pass_by_reference_p)
4067 addr = build_va_arg_indirect_ref (addr);
4069 return build_va_arg_indirect_ref (addr);
4072 /* Save parameter registers starting with the register that corresponds
4073 to the first unnamed parameters. If the first unnamed parameter is
4074 in the stack then save no registers. Set pretend_args_size to the
4075 amount of space needed to save the registers. */
4076 static void
4077 spu_setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
4078 tree type, int *pretend_size, int no_rtl)
4080 if (!no_rtl)
4082 rtx tmp;
4083 int regno;
4084 int offset;
4085 int ncum = *get_cumulative_args (cum);
4087 /* cum currently points to the last named argument, we want to
4088 start at the next argument. */
4089 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4091 offset = -STACK_POINTER_OFFSET;
4092 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4094 tmp = gen_frame_mem (V4SImode,
4095 plus_constant (Pmode, virtual_incoming_args_rtx,
4096 offset));
4097 emit_move_insn (tmp,
4098 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4099 offset += 16;
4101 *pretend_size = offset + STACK_POINTER_OFFSET;
4105 static void
4106 spu_conditional_register_usage (void)
4108 if (flag_pic)
4110 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4111 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4115 /* This is called any time we inspect the alignment of a register for
4116 addresses. */
4117 static int
4118 reg_aligned_for_addr (rtx x)
4120 int regno =
4121 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4122 return REGNO_POINTER_ALIGN (regno) >= 128;
4125 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4126 into its SYMBOL_REF_FLAGS. */
4127 static void
4128 spu_encode_section_info (tree decl, rtx rtl, int first)
4130 default_encode_section_info (decl, rtl, first);
4132 /* If a variable has a forced alignment to < 16 bytes, mark it with
4133 SYMBOL_FLAG_ALIGN1. */
4134 if (TREE_CODE (decl) == VAR_DECL
4135 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4136 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4139 /* Return TRUE if we are certain the mem refers to a complete object
4140 which is both 16-byte aligned and padded to a 16-byte boundary. This
4141 would make it safe to store with a single instruction.
4142 We guarantee the alignment and padding for static objects by aligning
4143 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4144 FIXME: We currently cannot guarantee this for objects on the stack
4145 because assign_parm_setup_stack calls assign_stack_local with the
4146 alignment of the parameter mode and in that case the alignment never
4147 gets adjusted by LOCAL_ALIGNMENT. */
4148 static int
4149 store_with_one_insn_p (rtx mem)
4151 enum machine_mode mode = GET_MODE (mem);
4152 rtx addr = XEXP (mem, 0);
4153 if (mode == BLKmode)
4154 return 0;
4155 if (GET_MODE_SIZE (mode) >= 16)
4156 return 1;
4157 /* Only static objects. */
4158 if (GET_CODE (addr) == SYMBOL_REF)
4160 /* We use the associated declaration to make sure the access is
4161 referring to the whole object.
4162 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4163 if it is necessary. Will there be cases where one exists, and
4164 the other does not? Will there be cases where both exist, but
4165 have different types? */
4166 tree decl = MEM_EXPR (mem);
4167 if (decl
4168 && TREE_CODE (decl) == VAR_DECL
4169 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4170 return 1;
4171 decl = SYMBOL_REF_DECL (addr);
4172 if (decl
4173 && TREE_CODE (decl) == VAR_DECL
4174 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4175 return 1;
4177 return 0;
4180 /* Return 1 when the address is not valid for a simple load and store as
4181 required by the '_mov*' patterns. We could make this less strict
4182 for loads, but we prefer mem's to look the same so they are more
4183 likely to be merged. */
4184 static int
4185 address_needs_split (rtx mem)
4187 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4188 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4189 || !(store_with_one_insn_p (mem)
4190 || mem_is_padded_component_ref (mem))))
4191 return 1;
4193 return 0;
4196 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4197 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4198 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4200 /* MEM is known to be an __ea qualified memory access. Emit a call to
4201 fetch the ppu memory to local store, and return its address in local
4202 store. */
4204 static void
4205 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4207 if (is_store)
4209 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4210 if (!cache_fetch_dirty)
4211 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4212 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4213 2, ea_addr, EAmode, ndirty, SImode);
4215 else
4217 if (!cache_fetch)
4218 cache_fetch = init_one_libfunc ("__cache_fetch");
4219 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4220 1, ea_addr, EAmode);
4224 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4225 dirty bit marking, inline.
4227 The cache control data structure is an array of
4229 struct __cache_tag_array
4231 unsigned int tag_lo[4];
4232 unsigned int tag_hi[4];
4233 void *data_pointer[4];
4234 int reserved[4];
4235 vector unsigned short dirty_bits[4];
4236 } */
4238 static void
4239 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4241 rtx ea_addr_si;
4242 HOST_WIDE_INT v;
4243 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4244 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4245 rtx index_mask = gen_reg_rtx (SImode);
4246 rtx tag_arr = gen_reg_rtx (Pmode);
4247 rtx splat_mask = gen_reg_rtx (TImode);
4248 rtx splat = gen_reg_rtx (V4SImode);
4249 rtx splat_hi = NULL_RTX;
4250 rtx tag_index = gen_reg_rtx (Pmode);
4251 rtx block_off = gen_reg_rtx (SImode);
4252 rtx tag_addr = gen_reg_rtx (Pmode);
4253 rtx tag = gen_reg_rtx (V4SImode);
4254 rtx cache_tag = gen_reg_rtx (V4SImode);
4255 rtx cache_tag_hi = NULL_RTX;
4256 rtx cache_ptrs = gen_reg_rtx (TImode);
4257 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4258 rtx tag_equal = gen_reg_rtx (V4SImode);
4259 rtx tag_equal_hi = NULL_RTX;
4260 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4261 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4262 rtx eq_index = gen_reg_rtx (SImode);
4263 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4265 if (spu_ea_model != 32)
4267 splat_hi = gen_reg_rtx (V4SImode);
4268 cache_tag_hi = gen_reg_rtx (V4SImode);
4269 tag_equal_hi = gen_reg_rtx (V4SImode);
4272 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4273 emit_move_insn (tag_arr, tag_arr_sym);
4274 v = 0x0001020300010203LL;
4275 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4276 ea_addr_si = ea_addr;
4277 if (spu_ea_model != 32)
4278 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4280 /* tag_index = ea_addr & (tag_array_size - 128) */
4281 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4283 /* splat ea_addr to all 4 slots. */
4284 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4285 /* Similarly for high 32 bits of ea_addr. */
4286 if (spu_ea_model != 32)
4287 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4289 /* block_off = ea_addr & 127 */
4290 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4292 /* tag_addr = tag_arr + tag_index */
4293 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4295 /* Read cache tags. */
4296 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4297 if (spu_ea_model != 32)
4298 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4299 plus_constant (Pmode,
4300 tag_addr, 16)));
4302 /* tag = ea_addr & -128 */
4303 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4305 /* Read all four cache data pointers. */
4306 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4307 plus_constant (Pmode,
4308 tag_addr, 32)));
4310 /* Compare tags. */
4311 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4312 if (spu_ea_model != 32)
4314 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4315 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4318 /* At most one of the tags compare equal, so tag_equal has one
4319 32-bit slot set to all 1's, with the other slots all zero.
4320 gbb picks off low bit from each byte in the 128-bit registers,
4321 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4322 we have a hit. */
4323 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4324 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4326 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4327 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4329 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4330 (rotating eq_index mod 16 bytes). */
4331 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4332 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4334 /* Add block offset to form final data address. */
4335 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4337 /* Check that we did hit. */
4338 hit_label = gen_label_rtx ();
4339 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4340 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4341 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4342 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4343 hit_ref, pc_rtx)));
4344 /* Say that this branch is very likely to happen. */
4345 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4346 add_int_reg_note (insn, REG_BR_PROB, v);
4348 ea_load_store (mem, is_store, ea_addr, data_addr);
4349 cont_label = gen_label_rtx ();
4350 emit_jump_insn (gen_jump (cont_label));
4351 emit_barrier ();
4353 emit_label (hit_label);
4355 if (is_store)
4357 HOST_WIDE_INT v_hi;
4358 rtx dirty_bits = gen_reg_rtx (TImode);
4359 rtx dirty_off = gen_reg_rtx (SImode);
4360 rtx dirty_128 = gen_reg_rtx (TImode);
4361 rtx neg_block_off = gen_reg_rtx (SImode);
4363 /* Set up mask with one dirty bit per byte of the mem we are
4364 writing, starting from top bit. */
4365 v_hi = v = -1;
4366 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4367 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4369 v_hi = v;
4370 v = 0;
4372 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4374 /* Form index into cache dirty_bits. eq_index is one of
4375 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4376 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4377 offset to each of the four dirty_bits elements. */
4378 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4380 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4382 /* Rotate bit mask to proper bit. */
4383 emit_insn (gen_negsi2 (neg_block_off, block_off));
4384 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4385 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4387 /* Or in the new dirty bits. */
4388 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4390 /* Store. */
4391 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4394 emit_label (cont_label);
4397 static rtx
4398 expand_ea_mem (rtx mem, bool is_store)
4400 rtx ea_addr;
4401 rtx data_addr = gen_reg_rtx (Pmode);
4402 rtx new_mem;
4404 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4405 if (optimize_size || optimize == 0)
4406 ea_load_store (mem, is_store, ea_addr, data_addr);
4407 else
4408 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4410 if (ea_alias_set == -1)
4411 ea_alias_set = new_alias_set ();
4413 /* We generate a new MEM RTX to refer to the copy of the data
4414 in the cache. We do not copy memory attributes (except the
4415 alignment) from the original MEM, as they may no longer apply
4416 to the cache copy. */
4417 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4418 set_mem_alias_set (new_mem, ea_alias_set);
4419 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4421 return new_mem;
4425 spu_expand_mov (rtx * ops, enum machine_mode mode)
4427 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4429 /* Perform the move in the destination SUBREG's inner mode. */
4430 ops[0] = SUBREG_REG (ops[0]);
4431 mode = GET_MODE (ops[0]);
4432 ops[1] = gen_lowpart_common (mode, ops[1]);
4433 gcc_assert (ops[1]);
4436 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4438 rtx from = SUBREG_REG (ops[1]);
4439 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4441 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4442 && GET_MODE_CLASS (imode) == MODE_INT
4443 && subreg_lowpart_p (ops[1]));
4445 if (GET_MODE_SIZE (imode) < 4)
4446 imode = SImode;
4447 if (imode != GET_MODE (from))
4448 from = gen_rtx_SUBREG (imode, from, 0);
4450 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4452 enum insn_code icode = convert_optab_handler (trunc_optab,
4453 mode, imode);
4454 emit_insn (GEN_FCN (icode) (ops[0], from));
4456 else
4457 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4458 return 1;
4461 /* At least one of the operands needs to be a register. */
4462 if ((reload_in_progress | reload_completed) == 0
4463 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4465 rtx temp = force_reg (mode, ops[1]);
4466 emit_move_insn (ops[0], temp);
4467 return 1;
4469 if (reload_in_progress || reload_completed)
4471 if (CONSTANT_P (ops[1]))
4472 return spu_split_immediate (ops);
4473 return 0;
4476 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4477 extend them. */
4478 if (GET_CODE (ops[1]) == CONST_INT)
4480 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4481 if (val != INTVAL (ops[1]))
4483 emit_move_insn (ops[0], GEN_INT (val));
4484 return 1;
4487 if (MEM_P (ops[0]))
4489 if (MEM_ADDR_SPACE (ops[0]))
4490 ops[0] = expand_ea_mem (ops[0], true);
4491 return spu_split_store (ops);
4493 if (MEM_P (ops[1]))
4495 if (MEM_ADDR_SPACE (ops[1]))
4496 ops[1] = expand_ea_mem (ops[1], false);
4497 return spu_split_load (ops);
4500 return 0;
4503 static void
4504 spu_convert_move (rtx dst, rtx src)
4506 enum machine_mode mode = GET_MODE (dst);
4507 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4508 rtx reg;
4509 gcc_assert (GET_MODE (src) == TImode);
4510 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4511 emit_insn (gen_rtx_SET (VOIDmode, reg,
4512 gen_rtx_TRUNCATE (int_mode,
4513 gen_rtx_LSHIFTRT (TImode, src,
4514 GEN_INT (int_mode == DImode ? 64 : 96)))));
4515 if (int_mode != mode)
4517 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4518 emit_move_insn (dst, reg);
4522 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4523 the address from SRC and SRC+16. Return a REG or CONST_INT that
4524 specifies how many bytes to rotate the loaded registers, plus any
4525 extra from EXTRA_ROTQBY. The address and rotate amounts are
4526 normalized to improve merging of loads and rotate computations. */
4527 static rtx
4528 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4530 rtx addr = XEXP (src, 0);
4531 rtx p0, p1, rot, addr0, addr1;
4532 int rot_amt;
4534 rot = 0;
4535 rot_amt = 0;
4537 if (MEM_ALIGN (src) >= 128)
4538 /* Address is already aligned; simply perform a TImode load. */ ;
4539 else if (GET_CODE (addr) == PLUS)
4541 /* 8 cases:
4542 aligned reg + aligned reg => lqx
4543 aligned reg + unaligned reg => lqx, rotqby
4544 aligned reg + aligned const => lqd
4545 aligned reg + unaligned const => lqd, rotqbyi
4546 unaligned reg + aligned reg => lqx, rotqby
4547 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4548 unaligned reg + aligned const => lqd, rotqby
4549 unaligned reg + unaligned const -> not allowed by legitimate address
4551 p0 = XEXP (addr, 0);
4552 p1 = XEXP (addr, 1);
4553 if (!reg_aligned_for_addr (p0))
4555 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4557 rot = gen_reg_rtx (SImode);
4558 emit_insn (gen_addsi3 (rot, p0, p1));
4560 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4562 if (INTVAL (p1) > 0
4563 && REG_POINTER (p0)
4564 && INTVAL (p1) * BITS_PER_UNIT
4565 < REGNO_POINTER_ALIGN (REGNO (p0)))
4567 rot = gen_reg_rtx (SImode);
4568 emit_insn (gen_addsi3 (rot, p0, p1));
4569 addr = p0;
4571 else
4573 rtx x = gen_reg_rtx (SImode);
4574 emit_move_insn (x, p1);
4575 if (!spu_arith_operand (p1, SImode))
4576 p1 = x;
4577 rot = gen_reg_rtx (SImode);
4578 emit_insn (gen_addsi3 (rot, p0, p1));
4579 addr = gen_rtx_PLUS (Pmode, p0, x);
4582 else
4583 rot = p0;
4585 else
4587 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4589 rot_amt = INTVAL (p1) & 15;
4590 if (INTVAL (p1) & -16)
4592 p1 = GEN_INT (INTVAL (p1) & -16);
4593 addr = gen_rtx_PLUS (SImode, p0, p1);
4595 else
4596 addr = p0;
4598 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4599 rot = p1;
4602 else if (REG_P (addr))
4604 if (!reg_aligned_for_addr (addr))
4605 rot = addr;
4607 else if (GET_CODE (addr) == CONST)
4609 if (GET_CODE (XEXP (addr, 0)) == PLUS
4610 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4611 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4613 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4614 if (rot_amt & -16)
4615 addr = gen_rtx_CONST (Pmode,
4616 gen_rtx_PLUS (Pmode,
4617 XEXP (XEXP (addr, 0), 0),
4618 GEN_INT (rot_amt & -16)));
4619 else
4620 addr = XEXP (XEXP (addr, 0), 0);
4622 else
4624 rot = gen_reg_rtx (Pmode);
4625 emit_move_insn (rot, addr);
4628 else if (GET_CODE (addr) == CONST_INT)
4630 rot_amt = INTVAL (addr);
4631 addr = GEN_INT (rot_amt & -16);
4633 else if (!ALIGNED_SYMBOL_REF_P (addr))
4635 rot = gen_reg_rtx (Pmode);
4636 emit_move_insn (rot, addr);
4639 rot_amt += extra_rotby;
4641 rot_amt &= 15;
4643 if (rot && rot_amt)
4645 rtx x = gen_reg_rtx (SImode);
4646 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4647 rot = x;
4648 rot_amt = 0;
4650 if (!rot && rot_amt)
4651 rot = GEN_INT (rot_amt);
4653 addr0 = copy_rtx (addr);
4654 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4655 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4657 if (dst1)
4659 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4660 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4661 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4664 return rot;
4668 spu_split_load (rtx * ops)
4670 enum machine_mode mode = GET_MODE (ops[0]);
4671 rtx addr, load, rot;
4672 int rot_amt;
4674 if (GET_MODE_SIZE (mode) >= 16)
4675 return 0;
4677 addr = XEXP (ops[1], 0);
4678 gcc_assert (GET_CODE (addr) != AND);
4680 if (!address_needs_split (ops[1]))
4682 ops[1] = change_address (ops[1], TImode, addr);
4683 load = gen_reg_rtx (TImode);
4684 emit_insn (gen__movti (load, ops[1]));
4685 spu_convert_move (ops[0], load);
4686 return 1;
4689 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4691 load = gen_reg_rtx (TImode);
4692 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4694 if (rot)
4695 emit_insn (gen_rotqby_ti (load, load, rot));
4697 spu_convert_move (ops[0], load);
4698 return 1;
4702 spu_split_store (rtx * ops)
4704 enum machine_mode mode = GET_MODE (ops[0]);
4705 rtx reg;
4706 rtx addr, p0, p1, p1_lo, smem;
4707 int aform;
4708 int scalar;
4710 if (GET_MODE_SIZE (mode) >= 16)
4711 return 0;
4713 addr = XEXP (ops[0], 0);
4714 gcc_assert (GET_CODE (addr) != AND);
4716 if (!address_needs_split (ops[0]))
4718 reg = gen_reg_rtx (TImode);
4719 emit_insn (gen_spu_convert (reg, ops[1]));
4720 ops[0] = change_address (ops[0], TImode, addr);
4721 emit_move_insn (ops[0], reg);
4722 return 1;
4725 if (GET_CODE (addr) == PLUS)
4727 /* 8 cases:
4728 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4729 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4730 aligned reg + aligned const => lqd, c?d, shuf, stqx
4731 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4732 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4733 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4734 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4735 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4737 aform = 0;
4738 p0 = XEXP (addr, 0);
4739 p1 = p1_lo = XEXP (addr, 1);
4740 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4742 p1_lo = GEN_INT (INTVAL (p1) & 15);
4743 if (reg_aligned_for_addr (p0))
4745 p1 = GEN_INT (INTVAL (p1) & -16);
4746 if (p1 == const0_rtx)
4747 addr = p0;
4748 else
4749 addr = gen_rtx_PLUS (SImode, p0, p1);
4751 else
4753 rtx x = gen_reg_rtx (SImode);
4754 emit_move_insn (x, p1);
4755 addr = gen_rtx_PLUS (SImode, p0, x);
4759 else if (REG_P (addr))
4761 aform = 0;
4762 p0 = addr;
4763 p1 = p1_lo = const0_rtx;
4765 else
4767 aform = 1;
4768 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4769 p1 = 0; /* aform doesn't use p1 */
4770 p1_lo = addr;
4771 if (ALIGNED_SYMBOL_REF_P (addr))
4772 p1_lo = const0_rtx;
4773 else if (GET_CODE (addr) == CONST
4774 && GET_CODE (XEXP (addr, 0)) == PLUS
4775 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4776 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4778 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4779 if ((v & -16) != 0)
4780 addr = gen_rtx_CONST (Pmode,
4781 gen_rtx_PLUS (Pmode,
4782 XEXP (XEXP (addr, 0), 0),
4783 GEN_INT (v & -16)));
4784 else
4785 addr = XEXP (XEXP (addr, 0), 0);
4786 p1_lo = GEN_INT (v & 15);
4788 else if (GET_CODE (addr) == CONST_INT)
4790 p1_lo = GEN_INT (INTVAL (addr) & 15);
4791 addr = GEN_INT (INTVAL (addr) & -16);
4793 else
4795 p1_lo = gen_reg_rtx (SImode);
4796 emit_move_insn (p1_lo, addr);
4800 gcc_assert (aform == 0 || aform == 1);
4801 reg = gen_reg_rtx (TImode);
4803 scalar = store_with_one_insn_p (ops[0]);
4804 if (!scalar)
4806 /* We could copy the flags from the ops[0] MEM to mem here,
4807 We don't because we want this load to be optimized away if
4808 possible, and copying the flags will prevent that in certain
4809 cases, e.g. consider the volatile flag. */
4811 rtx pat = gen_reg_rtx (TImode);
4812 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4813 set_mem_alias_set (lmem, 0);
4814 emit_insn (gen_movti (reg, lmem));
4816 if (!p0 || reg_aligned_for_addr (p0))
4817 p0 = stack_pointer_rtx;
4818 if (!p1_lo)
4819 p1_lo = const0_rtx;
4821 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4822 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4824 else
4826 if (GET_CODE (ops[1]) == REG)
4827 emit_insn (gen_spu_convert (reg, ops[1]));
4828 else if (GET_CODE (ops[1]) == SUBREG)
4829 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4830 else
4831 abort ();
4834 if (GET_MODE_SIZE (mode) < 4 && scalar)
4835 emit_insn (gen_ashlti3
4836 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4838 smem = change_address (ops[0], TImode, copy_rtx (addr));
4839 /* We can't use the previous alias set because the memory has changed
4840 size and can potentially overlap objects of other types. */
4841 set_mem_alias_set (smem, 0);
4843 emit_insn (gen_movti (smem, reg));
4844 return 1;
4847 /* Return TRUE if X is MEM which is a struct member reference
4848 and the member can safely be loaded and stored with a single
4849 instruction because it is padded. */
4850 static int
4851 mem_is_padded_component_ref (rtx x)
4853 tree t = MEM_EXPR (x);
4854 tree r;
4855 if (!t || TREE_CODE (t) != COMPONENT_REF)
4856 return 0;
4857 t = TREE_OPERAND (t, 1);
4858 if (!t || TREE_CODE (t) != FIELD_DECL
4859 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4860 return 0;
4861 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4862 r = DECL_FIELD_CONTEXT (t);
4863 if (!r || TREE_CODE (r) != RECORD_TYPE)
4864 return 0;
4865 /* Make sure they are the same mode */
4866 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4867 return 0;
4868 /* If there are no following fields then the field alignment assures
4869 the structure is padded to the alignment which means this field is
4870 padded too. */
4871 if (TREE_CHAIN (t) == 0)
4872 return 1;
4873 /* If the following field is also aligned then this field will be
4874 padded. */
4875 t = TREE_CHAIN (t);
4876 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4877 return 1;
4878 return 0;
4881 /* Parse the -mfixed-range= option string. */
4882 static void
4883 fix_range (const char *const_str)
4885 int i, first, last;
4886 char *str, *dash, *comma;
4888 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4889 REG2 are either register names or register numbers. The effect
4890 of this option is to mark the registers in the range from REG1 to
4891 REG2 as ``fixed'' so they won't be used by the compiler. */
4893 i = strlen (const_str);
4894 str = (char *) alloca (i + 1);
4895 memcpy (str, const_str, i + 1);
4897 while (1)
4899 dash = strchr (str, '-');
4900 if (!dash)
4902 warning (0, "value of -mfixed-range must have form REG1-REG2");
4903 return;
4905 *dash = '\0';
4906 comma = strchr (dash + 1, ',');
4907 if (comma)
4908 *comma = '\0';
4910 first = decode_reg_name (str);
4911 if (first < 0)
4913 warning (0, "unknown register name: %s", str);
4914 return;
4917 last = decode_reg_name (dash + 1);
4918 if (last < 0)
4920 warning (0, "unknown register name: %s", dash + 1);
4921 return;
4924 *dash = '-';
4926 if (first > last)
4928 warning (0, "%s-%s is an empty range", str, dash + 1);
4929 return;
4932 for (i = first; i <= last; ++i)
4933 fixed_regs[i] = call_used_regs[i] = 1;
4935 if (!comma)
4936 break;
4938 *comma = ',';
4939 str = comma + 1;
4943 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4944 can be generated using the fsmbi instruction. */
4946 fsmbi_const_p (rtx x)
4948 if (CONSTANT_P (x))
4950 /* We can always choose TImode for CONST_INT because the high bits
4951 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4952 enum immediate_class c = classify_immediate (x, TImode);
4953 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4955 return 0;
4958 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4959 can be generated using the cbd, chd, cwd or cdd instruction. */
4961 cpat_const_p (rtx x, enum machine_mode mode)
4963 if (CONSTANT_P (x))
4965 enum immediate_class c = classify_immediate (x, mode);
4966 return c == IC_CPAT;
4968 return 0;
4972 gen_cpat_const (rtx * ops)
4974 unsigned char dst[16];
4975 int i, offset, shift, isize;
4976 if (GET_CODE (ops[3]) != CONST_INT
4977 || GET_CODE (ops[2]) != CONST_INT
4978 || (GET_CODE (ops[1]) != CONST_INT
4979 && GET_CODE (ops[1]) != REG))
4980 return 0;
4981 if (GET_CODE (ops[1]) == REG
4982 && (!REG_POINTER (ops[1])
4983 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4984 return 0;
4986 for (i = 0; i < 16; i++)
4987 dst[i] = i + 16;
4988 isize = INTVAL (ops[3]);
4989 if (isize == 1)
4990 shift = 3;
4991 else if (isize == 2)
4992 shift = 2;
4993 else
4994 shift = 0;
4995 offset = (INTVAL (ops[2]) +
4996 (GET_CODE (ops[1]) ==
4997 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4998 for (i = 0; i < isize; i++)
4999 dst[offset + i] = i + shift;
5000 return array_to_constant (TImode, dst);
5003 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5004 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5005 than 16 bytes, the value is repeated across the rest of the array. */
5006 void
5007 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5009 HOST_WIDE_INT val;
5010 int i, j, first;
5012 memset (arr, 0, 16);
5013 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5014 if (GET_CODE (x) == CONST_INT
5015 || (GET_CODE (x) == CONST_DOUBLE
5016 && (mode == SFmode || mode == DFmode)))
5018 gcc_assert (mode != VOIDmode && mode != BLKmode);
5020 if (GET_CODE (x) == CONST_DOUBLE)
5021 val = const_double_to_hwint (x);
5022 else
5023 val = INTVAL (x);
5024 first = GET_MODE_SIZE (mode) - 1;
5025 for (i = first; i >= 0; i--)
5027 arr[i] = val & 0xff;
5028 val >>= 8;
5030 /* Splat the constant across the whole array. */
5031 for (j = 0, i = first + 1; i < 16; i++)
5033 arr[i] = arr[j];
5034 j = (j == first) ? 0 : j + 1;
5037 else if (GET_CODE (x) == CONST_DOUBLE)
5039 val = CONST_DOUBLE_LOW (x);
5040 for (i = 15; i >= 8; i--)
5042 arr[i] = val & 0xff;
5043 val >>= 8;
5045 val = CONST_DOUBLE_HIGH (x);
5046 for (i = 7; i >= 0; i--)
5048 arr[i] = val & 0xff;
5049 val >>= 8;
5052 else if (GET_CODE (x) == CONST_VECTOR)
5054 int units;
5055 rtx elt;
5056 mode = GET_MODE_INNER (mode);
5057 units = CONST_VECTOR_NUNITS (x);
5058 for (i = 0; i < units; i++)
5060 elt = CONST_VECTOR_ELT (x, i);
5061 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5063 if (GET_CODE (elt) == CONST_DOUBLE)
5064 val = const_double_to_hwint (elt);
5065 else
5066 val = INTVAL (elt);
5067 first = GET_MODE_SIZE (mode) - 1;
5068 if (first + i * GET_MODE_SIZE (mode) > 16)
5069 abort ();
5070 for (j = first; j >= 0; j--)
5072 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5073 val >>= 8;
5078 else
5079 gcc_unreachable();
5082 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5083 smaller than 16 bytes, use the bytes that would represent that value
5084 in a register, e.g., for QImode return the value of arr[3]. */
5086 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5088 enum machine_mode inner_mode;
5089 rtvec v;
5090 int units, size, i, j, k;
5091 HOST_WIDE_INT val;
5093 if (GET_MODE_CLASS (mode) == MODE_INT
5094 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5096 j = GET_MODE_SIZE (mode);
5097 i = j < 4 ? 4 - j : 0;
5098 for (val = 0; i < j; i++)
5099 val = (val << 8) | arr[i];
5100 val = trunc_int_for_mode (val, mode);
5101 return GEN_INT (val);
5104 if (mode == TImode)
5106 HOST_WIDE_INT high;
5107 for (i = high = 0; i < 8; i++)
5108 high = (high << 8) | arr[i];
5109 for (i = 8, val = 0; i < 16; i++)
5110 val = (val << 8) | arr[i];
5111 return immed_double_const (val, high, TImode);
5113 if (mode == SFmode)
5115 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5116 val = trunc_int_for_mode (val, SImode);
5117 return hwint_to_const_double (SFmode, val);
5119 if (mode == DFmode)
5121 for (i = 0, val = 0; i < 8; i++)
5122 val = (val << 8) | arr[i];
5123 return hwint_to_const_double (DFmode, val);
5126 if (!VECTOR_MODE_P (mode))
5127 abort ();
5129 units = GET_MODE_NUNITS (mode);
5130 size = GET_MODE_UNIT_SIZE (mode);
5131 inner_mode = GET_MODE_INNER (mode);
5132 v = rtvec_alloc (units);
5134 for (k = i = 0; i < units; ++i)
5136 val = 0;
5137 for (j = 0; j < size; j++, k++)
5138 val = (val << 8) | arr[k];
5140 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5141 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5142 else
5143 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5145 if (k > 16)
5146 abort ();
5148 return gen_rtx_CONST_VECTOR (mode, v);
5151 static void
5152 reloc_diagnostic (rtx x)
5154 tree decl = 0;
5155 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5156 return;
5158 if (GET_CODE (x) == SYMBOL_REF)
5159 decl = SYMBOL_REF_DECL (x);
5160 else if (GET_CODE (x) == CONST
5161 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5162 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5164 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5165 if (decl && !DECL_P (decl))
5166 decl = 0;
5168 /* The decl could be a string constant. */
5169 if (decl && DECL_P (decl))
5171 location_t loc;
5172 /* We use last_assemble_variable_decl to get line information. It's
5173 not always going to be right and might not even be close, but will
5174 be right for the more common cases. */
5175 if (!last_assemble_variable_decl || in_section == ctors_section)
5176 loc = DECL_SOURCE_LOCATION (decl);
5177 else
5178 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5180 if (TARGET_WARN_RELOC)
5181 warning_at (loc, 0,
5182 "creating run-time relocation for %qD", decl);
5183 else
5184 error_at (loc,
5185 "creating run-time relocation for %qD", decl);
5187 else
5189 if (TARGET_WARN_RELOC)
5190 warning_at (input_location, 0, "creating run-time relocation");
5191 else
5192 error_at (input_location, "creating run-time relocation");
5196 /* Hook into assemble_integer so we can generate an error for run-time
5197 relocations. The SPU ABI disallows them. */
5198 static bool
5199 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5201 /* By default run-time relocations aren't supported, but we allow them
5202 in case users support it in their own run-time loader. And we provide
5203 a warning for those users that don't. */
5204 if ((GET_CODE (x) == SYMBOL_REF)
5205 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5206 reloc_diagnostic (x);
5208 return default_assemble_integer (x, size, aligned_p);
5211 static void
5212 spu_asm_globalize_label (FILE * file, const char *name)
5214 fputs ("\t.global\t", file);
5215 assemble_name (file, name);
5216 fputs ("\n", file);
5219 static bool
5220 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED,
5221 int opno ATTRIBUTE_UNUSED, int *total,
5222 bool speed ATTRIBUTE_UNUSED)
5224 enum machine_mode mode = GET_MODE (x);
5225 int cost = COSTS_N_INSNS (2);
5227 /* Folding to a CONST_VECTOR will use extra space but there might
5228 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5229 only if it allows us to fold away multiple insns. Changing the cost
5230 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5231 because this cost will only be compared against a single insn.
5232 if (code == CONST_VECTOR)
5233 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5236 /* Use defaults for float operations. Not accurate but good enough. */
5237 if (mode == DFmode)
5239 *total = COSTS_N_INSNS (13);
5240 return true;
5242 if (mode == SFmode)
5244 *total = COSTS_N_INSNS (6);
5245 return true;
5247 switch (code)
5249 case CONST_INT:
5250 if (satisfies_constraint_K (x))
5251 *total = 0;
5252 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5253 *total = COSTS_N_INSNS (1);
5254 else
5255 *total = COSTS_N_INSNS (3);
5256 return true;
5258 case CONST:
5259 *total = COSTS_N_INSNS (3);
5260 return true;
5262 case LABEL_REF:
5263 case SYMBOL_REF:
5264 *total = COSTS_N_INSNS (0);
5265 return true;
5267 case CONST_DOUBLE:
5268 *total = COSTS_N_INSNS (5);
5269 return true;
5271 case FLOAT_EXTEND:
5272 case FLOAT_TRUNCATE:
5273 case FLOAT:
5274 case UNSIGNED_FLOAT:
5275 case FIX:
5276 case UNSIGNED_FIX:
5277 *total = COSTS_N_INSNS (7);
5278 return true;
5280 case PLUS:
5281 if (mode == TImode)
5283 *total = COSTS_N_INSNS (9);
5284 return true;
5286 break;
5288 case MULT:
5289 cost =
5290 GET_CODE (XEXP (x, 0)) ==
5291 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5292 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5294 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5296 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5297 cost = COSTS_N_INSNS (14);
5298 if ((val & 0xffff) == 0)
5299 cost = COSTS_N_INSNS (9);
5300 else if (val > 0 && val < 0x10000)
5301 cost = COSTS_N_INSNS (11);
5304 *total = cost;
5305 return true;
5306 case DIV:
5307 case UDIV:
5308 case MOD:
5309 case UMOD:
5310 *total = COSTS_N_INSNS (20);
5311 return true;
5312 case ROTATE:
5313 case ROTATERT:
5314 case ASHIFT:
5315 case ASHIFTRT:
5316 case LSHIFTRT:
5317 *total = COSTS_N_INSNS (4);
5318 return true;
5319 case UNSPEC:
5320 if (XINT (x, 1) == UNSPEC_CONVERT)
5321 *total = COSTS_N_INSNS (0);
5322 else
5323 *total = COSTS_N_INSNS (4);
5324 return true;
5326 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5327 if (GET_MODE_CLASS (mode) == MODE_INT
5328 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5329 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5330 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5331 *total = cost;
5332 return true;
5335 static enum machine_mode
5336 spu_unwind_word_mode (void)
5338 return SImode;
5341 /* Decide whether we can make a sibling call to a function. DECL is the
5342 declaration of the function being targeted by the call and EXP is the
5343 CALL_EXPR representing the call. */
5344 static bool
5345 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5347 return decl && !TARGET_LARGE_MEM;
5350 /* We need to correctly update the back chain pointer and the Available
5351 Stack Size (which is in the second slot of the sp register.) */
5352 void
5353 spu_allocate_stack (rtx op0, rtx op1)
5355 HOST_WIDE_INT v;
5356 rtx chain = gen_reg_rtx (V4SImode);
5357 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5358 rtx sp = gen_reg_rtx (V4SImode);
5359 rtx splatted = gen_reg_rtx (V4SImode);
5360 rtx pat = gen_reg_rtx (TImode);
5362 /* copy the back chain so we can save it back again. */
5363 emit_move_insn (chain, stack_bot);
5365 op1 = force_reg (SImode, op1);
5367 v = 0x1020300010203ll;
5368 emit_move_insn (pat, immed_double_const (v, v, TImode));
5369 emit_insn (gen_shufb (splatted, op1, op1, pat));
5371 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5372 emit_insn (gen_subv4si3 (sp, sp, splatted));
5374 if (flag_stack_check)
5376 rtx avail = gen_reg_rtx(SImode);
5377 rtx result = gen_reg_rtx(SImode);
5378 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5379 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5380 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5383 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5385 emit_move_insn (stack_bot, chain);
5387 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5390 void
5391 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5393 static unsigned char arr[16] =
5394 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5395 rtx temp = gen_reg_rtx (SImode);
5396 rtx temp2 = gen_reg_rtx (SImode);
5397 rtx temp3 = gen_reg_rtx (V4SImode);
5398 rtx temp4 = gen_reg_rtx (V4SImode);
5399 rtx pat = gen_reg_rtx (TImode);
5400 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5402 /* Restore the backchain from the first word, sp from the second. */
5403 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5404 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5406 emit_move_insn (pat, array_to_constant (TImode, arr));
5408 /* Compute Available Stack Size for sp */
5409 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5410 emit_insn (gen_shufb (temp3, temp, temp, pat));
5412 /* Compute Available Stack Size for back chain */
5413 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5414 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5415 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5417 emit_insn (gen_addv4si3 (sp, sp, temp3));
5418 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5421 static void
5422 spu_init_libfuncs (void)
5424 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5425 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5426 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5427 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5428 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5429 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5430 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5431 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5432 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5433 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5434 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5435 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5437 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5438 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5440 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5441 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5442 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5443 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5444 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5445 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5446 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5447 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5448 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5449 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5450 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5451 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5453 set_optab_libfunc (smul_optab, TImode, "__multi3");
5454 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5455 set_optab_libfunc (smod_optab, TImode, "__modti3");
5456 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5457 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5458 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5461 /* Make a subreg, stripping any existing subreg. We could possibly just
5462 call simplify_subreg, but in this case we know what we want. */
5464 spu_gen_subreg (enum machine_mode mode, rtx x)
5466 if (GET_CODE (x) == SUBREG)
5467 x = SUBREG_REG (x);
5468 if (GET_MODE (x) == mode)
5469 return x;
5470 return gen_rtx_SUBREG (mode, x, 0);
5473 static bool
5474 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5476 return (TYPE_MODE (type) == BLKmode
5477 && ((type) == 0
5478 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5479 || int_size_in_bytes (type) >
5480 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5483 /* Create the built-in types and functions */
5485 enum spu_function_code
5487 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5488 #include "spu-builtins.def"
5489 #undef DEF_BUILTIN
5490 NUM_SPU_BUILTINS
5493 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5495 struct spu_builtin_description spu_builtins[] = {
5496 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5497 {fcode, icode, name, type, params},
5498 #include "spu-builtins.def"
5499 #undef DEF_BUILTIN
5502 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5504 /* Returns the spu builtin decl for CODE. */
5506 static tree
5507 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5509 if (code >= NUM_SPU_BUILTINS)
5510 return error_mark_node;
5512 return spu_builtin_decls[code];
5516 static void
5517 spu_init_builtins (void)
5519 struct spu_builtin_description *d;
5520 unsigned int i;
5522 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5523 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5524 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5525 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5526 V4SF_type_node = build_vector_type (float_type_node, 4);
5527 V2DF_type_node = build_vector_type (double_type_node, 2);
5529 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5530 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5531 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5532 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5534 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5536 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5537 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5538 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5539 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5540 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5541 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5542 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5543 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5544 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5545 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5546 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5547 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5550 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5551 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5553 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5554 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5555 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5558 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5559 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5561 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5563 spu_builtin_types[SPU_BTI_PTR] =
5564 build_pointer_type (build_qualified_type
5565 (void_type_node,
5566 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5568 /* For each builtin we build a new prototype. The tree code will make
5569 sure nodes are shared. */
5570 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5572 tree p;
5573 char name[64]; /* build_function will make a copy. */
5574 int parm;
5576 if (d->name == 0)
5577 continue;
5579 /* Find last parm. */
5580 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5583 p = void_list_node;
5584 while (parm > 1)
5585 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5587 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5589 sprintf (name, "__builtin_%s", d->name);
5590 spu_builtin_decls[i] =
5591 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5592 if (d->fcode == SPU_MASK_FOR_LOAD)
5593 TREE_READONLY (spu_builtin_decls[i]) = 1;
5595 /* These builtins don't throw. */
5596 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5600 void
5601 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5603 static unsigned char arr[16] =
5604 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5606 rtx temp = gen_reg_rtx (Pmode);
5607 rtx temp2 = gen_reg_rtx (V4SImode);
5608 rtx temp3 = gen_reg_rtx (V4SImode);
5609 rtx pat = gen_reg_rtx (TImode);
5610 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5612 emit_move_insn (pat, array_to_constant (TImode, arr));
5614 /* Restore the sp. */
5615 emit_move_insn (temp, op1);
5616 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5618 /* Compute available stack size for sp. */
5619 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5620 emit_insn (gen_shufb (temp3, temp, temp, pat));
5622 emit_insn (gen_addv4si3 (sp, sp, temp3));
5623 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5627 spu_safe_dma (HOST_WIDE_INT channel)
5629 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5632 void
5633 spu_builtin_splats (rtx ops[])
5635 enum machine_mode mode = GET_MODE (ops[0]);
5636 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5638 unsigned char arr[16];
5639 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5640 emit_move_insn (ops[0], array_to_constant (mode, arr));
5642 else
5644 rtx reg = gen_reg_rtx (TImode);
5645 rtx shuf;
5646 if (GET_CODE (ops[1]) != REG
5647 && GET_CODE (ops[1]) != SUBREG)
5648 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5649 switch (mode)
5651 case V2DImode:
5652 case V2DFmode:
5653 shuf =
5654 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5655 TImode);
5656 break;
5657 case V4SImode:
5658 case V4SFmode:
5659 shuf =
5660 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5661 TImode);
5662 break;
5663 case V8HImode:
5664 shuf =
5665 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5666 TImode);
5667 break;
5668 case V16QImode:
5669 shuf =
5670 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5671 TImode);
5672 break;
5673 default:
5674 abort ();
5676 emit_move_insn (reg, shuf);
5677 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5681 void
5682 spu_builtin_extract (rtx ops[])
5684 enum machine_mode mode;
5685 rtx rot, from, tmp;
5687 mode = GET_MODE (ops[1]);
5689 if (GET_CODE (ops[2]) == CONST_INT)
5691 switch (mode)
5693 case V16QImode:
5694 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5695 break;
5696 case V8HImode:
5697 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5698 break;
5699 case V4SFmode:
5700 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5701 break;
5702 case V4SImode:
5703 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5704 break;
5705 case V2DImode:
5706 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5707 break;
5708 case V2DFmode:
5709 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5710 break;
5711 default:
5712 abort ();
5714 return;
5717 from = spu_gen_subreg (TImode, ops[1]);
5718 rot = gen_reg_rtx (TImode);
5719 tmp = gen_reg_rtx (SImode);
5721 switch (mode)
5723 case V16QImode:
5724 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5725 break;
5726 case V8HImode:
5727 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5728 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5729 break;
5730 case V4SFmode:
5731 case V4SImode:
5732 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5733 break;
5734 case V2DImode:
5735 case V2DFmode:
5736 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5737 break;
5738 default:
5739 abort ();
5741 emit_insn (gen_rotqby_ti (rot, from, tmp));
5743 emit_insn (gen_spu_convert (ops[0], rot));
5746 void
5747 spu_builtin_insert (rtx ops[])
5749 enum machine_mode mode = GET_MODE (ops[0]);
5750 enum machine_mode imode = GET_MODE_INNER (mode);
5751 rtx mask = gen_reg_rtx (TImode);
5752 rtx offset;
5754 if (GET_CODE (ops[3]) == CONST_INT)
5755 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5756 else
5758 offset = gen_reg_rtx (SImode);
5759 emit_insn (gen_mulsi3
5760 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5762 emit_insn (gen_cpat
5763 (mask, stack_pointer_rtx, offset,
5764 GEN_INT (GET_MODE_SIZE (imode))));
5765 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5768 void
5769 spu_builtin_promote (rtx ops[])
5771 enum machine_mode mode, imode;
5772 rtx rot, from, offset;
5773 HOST_WIDE_INT pos;
5775 mode = GET_MODE (ops[0]);
5776 imode = GET_MODE_INNER (mode);
5778 from = gen_reg_rtx (TImode);
5779 rot = spu_gen_subreg (TImode, ops[0]);
5781 emit_insn (gen_spu_convert (from, ops[1]));
5783 if (GET_CODE (ops[2]) == CONST_INT)
5785 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5786 if (GET_MODE_SIZE (imode) < 4)
5787 pos += 4 - GET_MODE_SIZE (imode);
5788 offset = GEN_INT (pos & 15);
5790 else
5792 offset = gen_reg_rtx (SImode);
5793 switch (mode)
5795 case V16QImode:
5796 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5797 break;
5798 case V8HImode:
5799 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5800 emit_insn (gen_addsi3 (offset, offset, offset));
5801 break;
5802 case V4SFmode:
5803 case V4SImode:
5804 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5805 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5806 break;
5807 case V2DImode:
5808 case V2DFmode:
5809 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5810 break;
5811 default:
5812 abort ();
5815 emit_insn (gen_rotqby_ti (rot, from, offset));
5818 static void
5819 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5821 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5822 rtx shuf = gen_reg_rtx (V4SImode);
5823 rtx insn = gen_reg_rtx (V4SImode);
5824 rtx shufc;
5825 rtx insnc;
5826 rtx mem;
5828 fnaddr = force_reg (SImode, fnaddr);
5829 cxt = force_reg (SImode, cxt);
5831 if (TARGET_LARGE_MEM)
5833 rtx rotl = gen_reg_rtx (V4SImode);
5834 rtx mask = gen_reg_rtx (V4SImode);
5835 rtx bi = gen_reg_rtx (SImode);
5836 static unsigned char const shufa[16] = {
5837 2, 3, 0, 1, 18, 19, 16, 17,
5838 0, 1, 2, 3, 16, 17, 18, 19
5840 static unsigned char const insna[16] = {
5841 0x41, 0, 0, 79,
5842 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5843 0x60, 0x80, 0, 79,
5844 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5847 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5848 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5850 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5851 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5852 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5853 emit_insn (gen_selb (insn, insnc, rotl, mask));
5855 mem = adjust_address (m_tramp, V4SImode, 0);
5856 emit_move_insn (mem, insn);
5858 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5859 mem = adjust_address (m_tramp, Pmode, 16);
5860 emit_move_insn (mem, bi);
5862 else
5864 rtx scxt = gen_reg_rtx (SImode);
5865 rtx sfnaddr = gen_reg_rtx (SImode);
5866 static unsigned char const insna[16] = {
5867 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5868 0x30, 0, 0, 0,
5869 0, 0, 0, 0,
5870 0, 0, 0, 0
5873 shufc = gen_reg_rtx (TImode);
5874 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5876 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5877 fits 18 bits and the last 4 are zeros. This will be true if
5878 the stack pointer is initialized to 0x3fff0 at program start,
5879 otherwise the ila instruction will be garbage. */
5881 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5882 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5883 emit_insn (gen_cpat
5884 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5885 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5886 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5888 mem = adjust_address (m_tramp, V4SImode, 0);
5889 emit_move_insn (mem, insn);
5891 emit_insn (gen_sync ());
5894 static bool
5895 spu_warn_func_return (tree decl)
5897 /* Naked functions are implemented entirely in assembly, including the
5898 return sequence, so suppress warnings about this. */
5899 return !spu_naked_function_p (decl);
5902 void
5903 spu_expand_sign_extend (rtx ops[])
5905 unsigned char arr[16];
5906 rtx pat = gen_reg_rtx (TImode);
5907 rtx sign, c;
5908 int i, last;
5909 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5910 if (GET_MODE (ops[1]) == QImode)
5912 sign = gen_reg_rtx (HImode);
5913 emit_insn (gen_extendqihi2 (sign, ops[1]));
5914 for (i = 0; i < 16; i++)
5915 arr[i] = 0x12;
5916 arr[last] = 0x13;
5918 else
5920 for (i = 0; i < 16; i++)
5921 arr[i] = 0x10;
5922 switch (GET_MODE (ops[1]))
5924 case HImode:
5925 sign = gen_reg_rtx (SImode);
5926 emit_insn (gen_extendhisi2 (sign, ops[1]));
5927 arr[last] = 0x03;
5928 arr[last - 1] = 0x02;
5929 break;
5930 case SImode:
5931 sign = gen_reg_rtx (SImode);
5932 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5933 for (i = 0; i < 4; i++)
5934 arr[last - i] = 3 - i;
5935 break;
5936 case DImode:
5937 sign = gen_reg_rtx (SImode);
5938 c = gen_reg_rtx (SImode);
5939 emit_insn (gen_spu_convert (c, ops[1]));
5940 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5941 for (i = 0; i < 8; i++)
5942 arr[last - i] = 7 - i;
5943 break;
5944 default:
5945 abort ();
5948 emit_move_insn (pat, array_to_constant (TImode, arr));
5949 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5952 /* expand vector initialization. If there are any constant parts,
5953 load constant parts first. Then load any non-constant parts. */
5954 void
5955 spu_expand_vector_init (rtx target, rtx vals)
5957 enum machine_mode mode = GET_MODE (target);
5958 int n_elts = GET_MODE_NUNITS (mode);
5959 int n_var = 0;
5960 bool all_same = true;
5961 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5962 int i;
5964 first = XVECEXP (vals, 0, 0);
5965 for (i = 0; i < n_elts; ++i)
5967 x = XVECEXP (vals, 0, i);
5968 if (!(CONST_INT_P (x)
5969 || GET_CODE (x) == CONST_DOUBLE
5970 || GET_CODE (x) == CONST_FIXED))
5971 ++n_var;
5972 else
5974 if (first_constant == NULL_RTX)
5975 first_constant = x;
5977 if (i > 0 && !rtx_equal_p (x, first))
5978 all_same = false;
5981 /* if all elements are the same, use splats to repeat elements */
5982 if (all_same)
5984 if (!CONSTANT_P (first)
5985 && !register_operand (first, GET_MODE (x)))
5986 first = force_reg (GET_MODE (first), first);
5987 emit_insn (gen_spu_splats (target, first));
5988 return;
5991 /* load constant parts */
5992 if (n_var != n_elts)
5994 if (n_var == 0)
5996 emit_move_insn (target,
5997 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5999 else
6001 rtx constant_parts_rtx = copy_rtx (vals);
6003 gcc_assert (first_constant != NULL_RTX);
6004 /* fill empty slots with the first constant, this increases
6005 our chance of using splats in the recursive call below. */
6006 for (i = 0; i < n_elts; ++i)
6008 x = XVECEXP (constant_parts_rtx, 0, i);
6009 if (!(CONST_INT_P (x)
6010 || GET_CODE (x) == CONST_DOUBLE
6011 || GET_CODE (x) == CONST_FIXED))
6012 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6015 spu_expand_vector_init (target, constant_parts_rtx);
6019 /* load variable parts */
6020 if (n_var != 0)
6022 rtx insert_operands[4];
6024 insert_operands[0] = target;
6025 insert_operands[2] = target;
6026 for (i = 0; i < n_elts; ++i)
6028 x = XVECEXP (vals, 0, i);
6029 if (!(CONST_INT_P (x)
6030 || GET_CODE (x) == CONST_DOUBLE
6031 || GET_CODE (x) == CONST_FIXED))
6033 if (!register_operand (x, GET_MODE (x)))
6034 x = force_reg (GET_MODE (x), x);
6035 insert_operands[1] = x;
6036 insert_operands[3] = GEN_INT (i);
6037 spu_builtin_insert (insert_operands);
6043 /* Return insn index for the vector compare instruction for given CODE,
6044 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6046 static int
6047 get_vec_cmp_insn (enum rtx_code code,
6048 enum machine_mode dest_mode,
6049 enum machine_mode op_mode)
6052 switch (code)
6054 case EQ:
6055 if (dest_mode == V16QImode && op_mode == V16QImode)
6056 return CODE_FOR_ceq_v16qi;
6057 if (dest_mode == V8HImode && op_mode == V8HImode)
6058 return CODE_FOR_ceq_v8hi;
6059 if (dest_mode == V4SImode && op_mode == V4SImode)
6060 return CODE_FOR_ceq_v4si;
6061 if (dest_mode == V4SImode && op_mode == V4SFmode)
6062 return CODE_FOR_ceq_v4sf;
6063 if (dest_mode == V2DImode && op_mode == V2DFmode)
6064 return CODE_FOR_ceq_v2df;
6065 break;
6066 case GT:
6067 if (dest_mode == V16QImode && op_mode == V16QImode)
6068 return CODE_FOR_cgt_v16qi;
6069 if (dest_mode == V8HImode && op_mode == V8HImode)
6070 return CODE_FOR_cgt_v8hi;
6071 if (dest_mode == V4SImode && op_mode == V4SImode)
6072 return CODE_FOR_cgt_v4si;
6073 if (dest_mode == V4SImode && op_mode == V4SFmode)
6074 return CODE_FOR_cgt_v4sf;
6075 if (dest_mode == V2DImode && op_mode == V2DFmode)
6076 return CODE_FOR_cgt_v2df;
6077 break;
6078 case GTU:
6079 if (dest_mode == V16QImode && op_mode == V16QImode)
6080 return CODE_FOR_clgt_v16qi;
6081 if (dest_mode == V8HImode && op_mode == V8HImode)
6082 return CODE_FOR_clgt_v8hi;
6083 if (dest_mode == V4SImode && op_mode == V4SImode)
6084 return CODE_FOR_clgt_v4si;
6085 break;
6086 default:
6087 break;
6089 return -1;
6092 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6093 DMODE is expected destination mode. This is a recursive function. */
6095 static rtx
6096 spu_emit_vector_compare (enum rtx_code rcode,
6097 rtx op0, rtx op1,
6098 enum machine_mode dmode)
6100 int vec_cmp_insn;
6101 rtx mask;
6102 enum machine_mode dest_mode;
6103 enum machine_mode op_mode = GET_MODE (op1);
6105 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6107 /* Floating point vector compare instructions uses destination V4SImode.
6108 Double floating point vector compare instructions uses destination V2DImode.
6109 Move destination to appropriate mode later. */
6110 if (dmode == V4SFmode)
6111 dest_mode = V4SImode;
6112 else if (dmode == V2DFmode)
6113 dest_mode = V2DImode;
6114 else
6115 dest_mode = dmode;
6117 mask = gen_reg_rtx (dest_mode);
6118 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6120 if (vec_cmp_insn == -1)
6122 bool swap_operands = false;
6123 bool try_again = false;
6124 switch (rcode)
6126 case LT:
6127 rcode = GT;
6128 swap_operands = true;
6129 try_again = true;
6130 break;
6131 case LTU:
6132 rcode = GTU;
6133 swap_operands = true;
6134 try_again = true;
6135 break;
6136 case NE:
6137 case UNEQ:
6138 case UNLE:
6139 case UNLT:
6140 case UNGE:
6141 case UNGT:
6142 case UNORDERED:
6143 /* Treat A != B as ~(A==B). */
6145 enum rtx_code rev_code;
6146 enum insn_code nor_code;
6147 rtx rev_mask;
6149 rev_code = reverse_condition_maybe_unordered (rcode);
6150 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6152 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6153 gcc_assert (nor_code != CODE_FOR_nothing);
6154 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6155 if (dmode != dest_mode)
6157 rtx temp = gen_reg_rtx (dest_mode);
6158 convert_move (temp, mask, 0);
6159 return temp;
6161 return mask;
6163 break;
6164 case GE:
6165 case GEU:
6166 case LE:
6167 case LEU:
6168 /* Try GT/GTU/LT/LTU OR EQ */
6170 rtx c_rtx, eq_rtx;
6171 enum insn_code ior_code;
6172 enum rtx_code new_code;
6174 switch (rcode)
6176 case GE: new_code = GT; break;
6177 case GEU: new_code = GTU; break;
6178 case LE: new_code = LT; break;
6179 case LEU: new_code = LTU; break;
6180 default:
6181 gcc_unreachable ();
6184 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6185 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6187 ior_code = optab_handler (ior_optab, dest_mode);
6188 gcc_assert (ior_code != CODE_FOR_nothing);
6189 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6190 if (dmode != dest_mode)
6192 rtx temp = gen_reg_rtx (dest_mode);
6193 convert_move (temp, mask, 0);
6194 return temp;
6196 return mask;
6198 break;
6199 case LTGT:
6200 /* Try LT OR GT */
6202 rtx lt_rtx, gt_rtx;
6203 enum insn_code ior_code;
6205 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6206 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6208 ior_code = optab_handler (ior_optab, dest_mode);
6209 gcc_assert (ior_code != CODE_FOR_nothing);
6210 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6211 if (dmode != dest_mode)
6213 rtx temp = gen_reg_rtx (dest_mode);
6214 convert_move (temp, mask, 0);
6215 return temp;
6217 return mask;
6219 break;
6220 case ORDERED:
6221 /* Implement as (A==A) & (B==B) */
6223 rtx a_rtx, b_rtx;
6224 enum insn_code and_code;
6226 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6227 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6229 and_code = optab_handler (and_optab, dest_mode);
6230 gcc_assert (and_code != CODE_FOR_nothing);
6231 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6232 if (dmode != dest_mode)
6234 rtx temp = gen_reg_rtx (dest_mode);
6235 convert_move (temp, mask, 0);
6236 return temp;
6238 return mask;
6240 break;
6241 default:
6242 gcc_unreachable ();
6245 /* You only get two chances. */
6246 if (try_again)
6247 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6249 gcc_assert (vec_cmp_insn != -1);
6251 if (swap_operands)
6253 rtx tmp;
6254 tmp = op0;
6255 op0 = op1;
6256 op1 = tmp;
6260 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6261 if (dmode != dest_mode)
6263 rtx temp = gen_reg_rtx (dest_mode);
6264 convert_move (temp, mask, 0);
6265 return temp;
6267 return mask;
6271 /* Emit vector conditional expression.
6272 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6273 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6276 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6277 rtx cond, rtx cc_op0, rtx cc_op1)
6279 enum machine_mode dest_mode = GET_MODE (dest);
6280 enum rtx_code rcode = GET_CODE (cond);
6281 rtx mask;
6283 /* Get the vector mask for the given relational operations. */
6284 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6286 emit_insn(gen_selb (dest, op2, op1, mask));
6288 return 1;
6291 static rtx
6292 spu_force_reg (enum machine_mode mode, rtx op)
6294 rtx x, r;
6295 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6297 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6298 || GET_MODE (op) == BLKmode)
6299 return force_reg (mode, convert_to_mode (mode, op, 0));
6300 abort ();
6303 r = force_reg (GET_MODE (op), op);
6304 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6306 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6307 if (x)
6308 return x;
6311 x = gen_reg_rtx (mode);
6312 emit_insn (gen_spu_convert (x, r));
6313 return x;
6316 static void
6317 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6319 HOST_WIDE_INT v = 0;
6320 int lsbits;
6321 /* Check the range of immediate operands. */
6322 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6324 int range = p - SPU_BTI_7;
6326 if (!CONSTANT_P (op))
6327 error ("%s expects an integer literal in the range [%d, %d]",
6328 d->name,
6329 spu_builtin_range[range].low, spu_builtin_range[range].high);
6331 if (GET_CODE (op) == CONST
6332 && (GET_CODE (XEXP (op, 0)) == PLUS
6333 || GET_CODE (XEXP (op, 0)) == MINUS))
6335 v = INTVAL (XEXP (XEXP (op, 0), 1));
6336 op = XEXP (XEXP (op, 0), 0);
6338 else if (GET_CODE (op) == CONST_INT)
6339 v = INTVAL (op);
6340 else if (GET_CODE (op) == CONST_VECTOR
6341 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6342 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6344 /* The default for v is 0 which is valid in every range. */
6345 if (v < spu_builtin_range[range].low
6346 || v > spu_builtin_range[range].high)
6347 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6348 d->name,
6349 spu_builtin_range[range].low, spu_builtin_range[range].high,
6352 switch (p)
6354 case SPU_BTI_S10_4:
6355 lsbits = 4;
6356 break;
6357 case SPU_BTI_U16_2:
6358 /* This is only used in lqa, and stqa. Even though the insns
6359 encode 16 bits of the address (all but the 2 least
6360 significant), only 14 bits are used because it is masked to
6361 be 16 byte aligned. */
6362 lsbits = 4;
6363 break;
6364 case SPU_BTI_S16_2:
6365 /* This is used for lqr and stqr. */
6366 lsbits = 2;
6367 break;
6368 default:
6369 lsbits = 0;
6372 if (GET_CODE (op) == LABEL_REF
6373 || (GET_CODE (op) == SYMBOL_REF
6374 && SYMBOL_REF_FUNCTION_P (op))
6375 || (v & ((1 << lsbits) - 1)) != 0)
6376 warning (0, "%d least significant bits of %s are ignored", lsbits,
6377 d->name);
6382 static int
6383 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6384 rtx target, rtx ops[])
6386 enum insn_code icode = (enum insn_code) d->icode;
6387 int i = 0, a;
6389 /* Expand the arguments into rtl. */
6391 if (d->parm[0] != SPU_BTI_VOID)
6392 ops[i++] = target;
6394 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6396 tree arg = CALL_EXPR_ARG (exp, a);
6397 if (arg == 0)
6398 abort ();
6399 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6402 gcc_assert (i == insn_data[icode].n_generator_args);
6403 return i;
6406 static rtx
6407 spu_expand_builtin_1 (struct spu_builtin_description *d,
6408 tree exp, rtx target)
6410 rtx pat;
6411 rtx ops[8];
6412 enum insn_code icode = (enum insn_code) d->icode;
6413 enum machine_mode mode, tmode;
6414 int i, p;
6415 int n_operands;
6416 tree return_type;
6418 /* Set up ops[] with values from arglist. */
6419 n_operands = expand_builtin_args (d, exp, target, ops);
6421 /* Handle the target operand which must be operand 0. */
6422 i = 0;
6423 if (d->parm[0] != SPU_BTI_VOID)
6426 /* We prefer the mode specified for the match_operand otherwise
6427 use the mode from the builtin function prototype. */
6428 tmode = insn_data[d->icode].operand[0].mode;
6429 if (tmode == VOIDmode)
6430 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6432 /* Try to use target because not using it can lead to extra copies
6433 and when we are using all of the registers extra copies leads
6434 to extra spills. */
6435 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6436 ops[0] = target;
6437 else
6438 target = ops[0] = gen_reg_rtx (tmode);
6440 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6441 abort ();
6443 i++;
6446 if (d->fcode == SPU_MASK_FOR_LOAD)
6448 enum machine_mode mode = insn_data[icode].operand[1].mode;
6449 tree arg;
6450 rtx addr, op, pat;
6452 /* get addr */
6453 arg = CALL_EXPR_ARG (exp, 0);
6454 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6455 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6456 addr = memory_address (mode, op);
6458 /* negate addr */
6459 op = gen_reg_rtx (GET_MODE (addr));
6460 emit_insn (gen_rtx_SET (VOIDmode, op,
6461 gen_rtx_NEG (GET_MODE (addr), addr)));
6462 op = gen_rtx_MEM (mode, op);
6464 pat = GEN_FCN (icode) (target, op);
6465 if (!pat)
6466 return 0;
6467 emit_insn (pat);
6468 return target;
6471 /* Ignore align_hint, but still expand it's args in case they have
6472 side effects. */
6473 if (icode == CODE_FOR_spu_align_hint)
6474 return 0;
6476 /* Handle the rest of the operands. */
6477 for (p = 1; i < n_operands; i++, p++)
6479 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6480 mode = insn_data[d->icode].operand[i].mode;
6481 else
6482 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6484 /* mode can be VOIDmode here for labels */
6486 /* For specific intrinsics with an immediate operand, e.g.,
6487 si_ai(), we sometimes need to convert the scalar argument to a
6488 vector argument by splatting the scalar. */
6489 if (VECTOR_MODE_P (mode)
6490 && (GET_CODE (ops[i]) == CONST_INT
6491 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6492 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6494 if (GET_CODE (ops[i]) == CONST_INT)
6495 ops[i] = spu_const (mode, INTVAL (ops[i]));
6496 else
6498 rtx reg = gen_reg_rtx (mode);
6499 enum machine_mode imode = GET_MODE_INNER (mode);
6500 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6501 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6502 if (imode != GET_MODE (ops[i]))
6503 ops[i] = convert_to_mode (imode, ops[i],
6504 TYPE_UNSIGNED (spu_builtin_types
6505 [d->parm[i]]));
6506 emit_insn (gen_spu_splats (reg, ops[i]));
6507 ops[i] = reg;
6511 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6513 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6514 ops[i] = spu_force_reg (mode, ops[i]);
6517 switch (n_operands)
6519 case 0:
6520 pat = GEN_FCN (icode) (0);
6521 break;
6522 case 1:
6523 pat = GEN_FCN (icode) (ops[0]);
6524 break;
6525 case 2:
6526 pat = GEN_FCN (icode) (ops[0], ops[1]);
6527 break;
6528 case 3:
6529 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6530 break;
6531 case 4:
6532 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6533 break;
6534 case 5:
6535 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6536 break;
6537 case 6:
6538 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6539 break;
6540 default:
6541 abort ();
6544 if (!pat)
6545 abort ();
6547 if (d->type == B_CALL || d->type == B_BISLED)
6548 emit_call_insn (pat);
6549 else if (d->type == B_JUMP)
6551 emit_jump_insn (pat);
6552 emit_barrier ();
6554 else
6555 emit_insn (pat);
6557 return_type = spu_builtin_types[d->parm[0]];
6558 if (d->parm[0] != SPU_BTI_VOID
6559 && GET_MODE (target) != TYPE_MODE (return_type))
6561 /* target is the return value. It should always be the mode of
6562 the builtin function prototype. */
6563 target = spu_force_reg (TYPE_MODE (return_type), target);
6566 return target;
6570 spu_expand_builtin (tree exp,
6571 rtx target,
6572 rtx subtarget ATTRIBUTE_UNUSED,
6573 enum machine_mode mode ATTRIBUTE_UNUSED,
6574 int ignore ATTRIBUTE_UNUSED)
6576 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6577 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6578 struct spu_builtin_description *d;
6580 if (fcode < NUM_SPU_BUILTINS)
6582 d = &spu_builtins[fcode];
6584 return spu_expand_builtin_1 (d, exp, target);
6586 abort ();
6589 /* Implement targetm.vectorize.builtin_mask_for_load. */
6590 static tree
6591 spu_builtin_mask_for_load (void)
6593 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6596 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6597 static int
6598 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6599 tree vectype,
6600 int misalign ATTRIBUTE_UNUSED)
6602 unsigned elements;
6604 switch (type_of_cost)
6606 case scalar_stmt:
6607 case vector_stmt:
6608 case vector_load:
6609 case vector_store:
6610 case vec_to_scalar:
6611 case scalar_to_vec:
6612 case cond_branch_not_taken:
6613 case vec_perm:
6614 case vec_promote_demote:
6615 return 1;
6617 case scalar_store:
6618 return 10;
6620 case scalar_load:
6621 /* Load + rotate. */
6622 return 2;
6624 case unaligned_load:
6625 return 2;
6627 case cond_branch_taken:
6628 return 6;
6630 case vec_construct:
6631 elements = TYPE_VECTOR_SUBPARTS (vectype);
6632 return elements / 2 + 1;
6634 default:
6635 gcc_unreachable ();
6639 /* Implement targetm.vectorize.init_cost. */
6641 static void *
6642 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6644 unsigned *cost = XNEWVEC (unsigned, 3);
6645 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6646 return cost;
6649 /* Implement targetm.vectorize.add_stmt_cost. */
6651 static unsigned
6652 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6653 struct _stmt_vec_info *stmt_info, int misalign,
6654 enum vect_cost_model_location where)
6656 unsigned *cost = (unsigned *) data;
6657 unsigned retval = 0;
6659 if (flag_vect_cost_model)
6661 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6662 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6664 /* Statements in an inner loop relative to the loop being
6665 vectorized are weighted more heavily. The value here is
6666 arbitrary and could potentially be improved with analysis. */
6667 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6668 count *= 50; /* FIXME. */
6670 retval = (unsigned) (count * stmt_cost);
6671 cost[where] += retval;
6674 return retval;
6677 /* Implement targetm.vectorize.finish_cost. */
6679 static void
6680 spu_finish_cost (void *data, unsigned *prologue_cost,
6681 unsigned *body_cost, unsigned *epilogue_cost)
6683 unsigned *cost = (unsigned *) data;
6684 *prologue_cost = cost[vect_prologue];
6685 *body_cost = cost[vect_body];
6686 *epilogue_cost = cost[vect_epilogue];
6689 /* Implement targetm.vectorize.destroy_cost_data. */
6691 static void
6692 spu_destroy_cost_data (void *data)
6694 free (data);
6697 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6698 after applying N number of iterations. This routine does not determine
6699 how may iterations are required to reach desired alignment. */
6701 static bool
6702 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6704 if (is_packed)
6705 return false;
6707 /* All other types are naturally aligned. */
6708 return true;
6711 /* Return the appropriate mode for a named address pointer. */
6712 static enum machine_mode
6713 spu_addr_space_pointer_mode (addr_space_t addrspace)
6715 switch (addrspace)
6717 case ADDR_SPACE_GENERIC:
6718 return ptr_mode;
6719 case ADDR_SPACE_EA:
6720 return EAmode;
6721 default:
6722 gcc_unreachable ();
6726 /* Return the appropriate mode for a named address address. */
6727 static enum machine_mode
6728 spu_addr_space_address_mode (addr_space_t addrspace)
6730 switch (addrspace)
6732 case ADDR_SPACE_GENERIC:
6733 return Pmode;
6734 case ADDR_SPACE_EA:
6735 return EAmode;
6736 default:
6737 gcc_unreachable ();
6741 /* Determine if one named address space is a subset of another. */
6743 static bool
6744 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6746 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6747 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6749 if (subset == superset)
6750 return true;
6752 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6753 being subsets but instead as disjoint address spaces. */
6754 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6755 return false;
6757 else
6758 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6761 /* Convert from one address space to another. */
6762 static rtx
6763 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6765 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6766 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6768 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6769 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6771 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6773 rtx result, ls;
6775 ls = gen_const_mem (DImode,
6776 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6777 set_mem_align (ls, 128);
6779 result = gen_reg_rtx (Pmode);
6780 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6781 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6782 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6783 ls, const0_rtx, Pmode, 1);
6785 emit_insn (gen_subsi3 (result, op, ls));
6787 return result;
6790 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6792 rtx result, ls;
6794 ls = gen_const_mem (DImode,
6795 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6796 set_mem_align (ls, 128);
6798 result = gen_reg_rtx (EAmode);
6799 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6800 op = force_reg (Pmode, op);
6801 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6802 ls, const0_rtx, EAmode, 1);
6803 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6805 if (EAmode == SImode)
6806 emit_insn (gen_addsi3 (result, op, ls));
6807 else
6808 emit_insn (gen_adddi3 (result, op, ls));
6810 return result;
6813 else
6814 gcc_unreachable ();
6818 /* Count the total number of instructions in each pipe and return the
6819 maximum, which is used as the Minimum Iteration Interval (MII)
6820 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6821 -2 are instructions that can go in pipe0 or pipe1. */
6822 static int
6823 spu_sms_res_mii (struct ddg *g)
6825 int i;
6826 unsigned t[4] = {0, 0, 0, 0};
6828 for (i = 0; i < g->num_nodes; i++)
6830 rtx insn = g->nodes[i].insn;
6831 int p = get_pipe (insn) + 2;
6833 gcc_assert (p >= 0);
6834 gcc_assert (p < 4);
6836 t[p]++;
6837 if (dump_file && INSN_P (insn))
6838 fprintf (dump_file, "i%d %s %d %d\n",
6839 INSN_UID (insn),
6840 insn_data[INSN_CODE(insn)].name,
6841 p, t[p]);
6843 if (dump_file)
6844 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6846 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6850 void
6851 spu_init_expanders (void)
6853 if (cfun)
6855 rtx r0, r1;
6856 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6857 frame_pointer_needed is true. We don't know that until we're
6858 expanding the prologue. */
6859 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6861 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6862 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6863 to be treated as aligned, so generate them here. */
6864 r0 = gen_reg_rtx (SImode);
6865 r1 = gen_reg_rtx (SImode);
6866 mark_reg_pointer (r0, 128);
6867 mark_reg_pointer (r1, 128);
6868 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6869 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6873 static enum machine_mode
6874 spu_libgcc_cmp_return_mode (void)
6877 /* For SPU word mode is TI mode so it is better to use SImode
6878 for compare returns. */
6879 return SImode;
6882 static enum machine_mode
6883 spu_libgcc_shift_count_mode (void)
6885 /* For SPU word mode is TI mode so it is better to use SImode
6886 for shift counts. */
6887 return SImode;
6890 /* Implement targetm.section_type_flags. */
6891 static unsigned int
6892 spu_section_type_flags (tree decl, const char *name, int reloc)
6894 /* .toe needs to have type @nobits. */
6895 if (strcmp (name, ".toe") == 0)
6896 return SECTION_BSS;
6897 /* Don't load _ea into the current address space. */
6898 if (strcmp (name, "._ea") == 0)
6899 return SECTION_WRITE | SECTION_DEBUG;
6900 return default_section_type_flags (decl, name, reloc);
6903 /* Implement targetm.select_section. */
6904 static section *
6905 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6907 /* Variables and constants defined in the __ea address space
6908 go into a special section named "._ea". */
6909 if (TREE_TYPE (decl) != error_mark_node
6910 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6912 /* We might get called with string constants, but get_named_section
6913 doesn't like them as they are not DECLs. Also, we need to set
6914 flags in that case. */
6915 if (!DECL_P (decl))
6916 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6918 return get_named_section (decl, "._ea", reloc);
6921 return default_elf_select_section (decl, reloc, align);
6924 /* Implement targetm.unique_section. */
6925 static void
6926 spu_unique_section (tree decl, int reloc)
6928 /* We don't support unique section names in the __ea address
6929 space for now. */
6930 if (TREE_TYPE (decl) != error_mark_node
6931 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6932 return;
6934 default_unique_section (decl, reloc);
6937 /* Generate a constant or register which contains 2^SCALE. We assume
6938 the result is valid for MODE. Currently, MODE must be V4SFmode and
6939 SCALE must be SImode. */
6941 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6943 gcc_assert (mode == V4SFmode);
6944 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6945 if (GET_CODE (scale) != CONST_INT)
6947 /* unsigned int exp = (127 + scale) << 23;
6948 __vector float m = (__vector float) spu_splats (exp); */
6949 rtx reg = force_reg (SImode, scale);
6950 rtx exp = gen_reg_rtx (SImode);
6951 rtx mul = gen_reg_rtx (mode);
6952 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6953 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6954 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6955 return mul;
6957 else
6959 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6960 unsigned char arr[16];
6961 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6962 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6963 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6964 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6965 return array_to_constant (mode, arr);
6969 /* After reload, just change the convert into a move instruction
6970 or a dead instruction. */
6971 void
6972 spu_split_convert (rtx ops[])
6974 if (REGNO (ops[0]) == REGNO (ops[1]))
6975 emit_note (NOTE_INSN_DELETED);
6976 else
6978 /* Use TImode always as this might help hard reg copyprop. */
6979 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6980 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6981 emit_insn (gen_move_insn (op0, op1));
6985 void
6986 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6988 fprintf (file, "# profile\n");
6989 fprintf (file, "brsl $75, _mcount\n");
6992 /* Implement targetm.ref_may_alias_errno. */
6993 static bool
6994 spu_ref_may_alias_errno (ao_ref *ref)
6996 tree base = ao_ref_base (ref);
6998 /* With SPU newlib, errno is defined as something like
6999 _impure_data._errno
7000 The default implementation of this target macro does not
7001 recognize such expressions, so special-code for it here. */
7003 if (TREE_CODE (base) == VAR_DECL
7004 && !TREE_STATIC (base)
7005 && DECL_EXTERNAL (base)
7006 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7007 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7008 "_impure_data") == 0
7009 /* _errno is the first member of _impure_data. */
7010 && ref->offset == 0)
7011 return true;
7013 return default_ref_may_alias_errno (ref);
7016 /* Output thunk to FILE that implements a C++ virtual function call (with
7017 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7018 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7019 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7020 relative to the resulting this pointer. */
7022 static void
7023 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7024 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7025 tree function)
7027 rtx op[8];
7029 /* Make sure unwind info is emitted for the thunk if needed. */
7030 final_start_function (emit_barrier (), file, 1);
7032 /* Operand 0 is the target function. */
7033 op[0] = XEXP (DECL_RTL (function), 0);
7035 /* Operand 1 is the 'this' pointer. */
7036 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7037 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7038 else
7039 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7041 /* Operands 2/3 are the low/high halfwords of delta. */
7042 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7043 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7045 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7046 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7047 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7049 /* Operands 6/7 are temporary registers. */
7050 op[6] = gen_rtx_REG (Pmode, 79);
7051 op[7] = gen_rtx_REG (Pmode, 78);
7053 /* Add DELTA to this pointer. */
7054 if (delta)
7056 if (delta >= -0x200 && delta < 0x200)
7057 output_asm_insn ("ai\t%1,%1,%2", op);
7058 else if (delta >= -0x8000 && delta < 0x8000)
7060 output_asm_insn ("il\t%6,%2", op);
7061 output_asm_insn ("a\t%1,%1,%6", op);
7063 else
7065 output_asm_insn ("ilhu\t%6,%3", op);
7066 output_asm_insn ("iohl\t%6,%2", op);
7067 output_asm_insn ("a\t%1,%1,%6", op);
7071 /* Perform vcall adjustment. */
7072 if (vcall_offset)
7074 output_asm_insn ("lqd\t%7,0(%1)", op);
7075 output_asm_insn ("rotqby\t%7,%7,%1", op);
7077 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7078 output_asm_insn ("ai\t%7,%7,%4", op);
7079 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7081 output_asm_insn ("il\t%6,%4", op);
7082 output_asm_insn ("a\t%7,%7,%6", op);
7084 else
7086 output_asm_insn ("ilhu\t%6,%5", op);
7087 output_asm_insn ("iohl\t%6,%4", op);
7088 output_asm_insn ("a\t%7,%7,%6", op);
7091 output_asm_insn ("lqd\t%6,0(%7)", op);
7092 output_asm_insn ("rotqby\t%6,%6,%7", op);
7093 output_asm_insn ("a\t%1,%1,%6", op);
7096 /* Jump to target. */
7097 output_asm_insn ("br\t%0", op);
7099 final_end_function ();
7102 /* Canonicalize a comparison from one we don't have to one we do have. */
7103 static void
7104 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7105 bool op0_preserve_value)
7107 if (!op0_preserve_value
7108 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7110 rtx tem = *op0;
7111 *op0 = *op1;
7112 *op1 = tem;
7113 *code = (int)swap_condition ((enum rtx_code)*code);
7117 /* Table of machine attributes. */
7118 static const struct attribute_spec spu_attribute_table[] =
7120 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7121 affects_type_identity } */
7122 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7123 false },
7124 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7125 false },
7126 { NULL, 0, 0, false, false, false, NULL, false }
7129 /* TARGET overrides. */
7131 #undef TARGET_ADDR_SPACE_POINTER_MODE
7132 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7134 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7135 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7137 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7138 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7139 spu_addr_space_legitimate_address_p
7141 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7142 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7144 #undef TARGET_ADDR_SPACE_SUBSET_P
7145 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7147 #undef TARGET_ADDR_SPACE_CONVERT
7148 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7150 #undef TARGET_INIT_BUILTINS
7151 #define TARGET_INIT_BUILTINS spu_init_builtins
7152 #undef TARGET_BUILTIN_DECL
7153 #define TARGET_BUILTIN_DECL spu_builtin_decl
7155 #undef TARGET_EXPAND_BUILTIN
7156 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7158 #undef TARGET_UNWIND_WORD_MODE
7159 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7161 #undef TARGET_LEGITIMIZE_ADDRESS
7162 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7164 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7165 and .quad for the debugger. When it is known that the assembler is fixed,
7166 these can be removed. */
7167 #undef TARGET_ASM_UNALIGNED_SI_OP
7168 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7170 #undef TARGET_ASM_ALIGNED_DI_OP
7171 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7173 /* The .8byte directive doesn't seem to work well for a 32 bit
7174 architecture. */
7175 #undef TARGET_ASM_UNALIGNED_DI_OP
7176 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7178 #undef TARGET_RTX_COSTS
7179 #define TARGET_RTX_COSTS spu_rtx_costs
7181 #undef TARGET_ADDRESS_COST
7182 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7184 #undef TARGET_SCHED_ISSUE_RATE
7185 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7187 #undef TARGET_SCHED_INIT_GLOBAL
7188 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7190 #undef TARGET_SCHED_INIT
7191 #define TARGET_SCHED_INIT spu_sched_init
7193 #undef TARGET_SCHED_VARIABLE_ISSUE
7194 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7196 #undef TARGET_SCHED_REORDER
7197 #define TARGET_SCHED_REORDER spu_sched_reorder
7199 #undef TARGET_SCHED_REORDER2
7200 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7202 #undef TARGET_SCHED_ADJUST_COST
7203 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7205 #undef TARGET_ATTRIBUTE_TABLE
7206 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7208 #undef TARGET_ASM_INTEGER
7209 #define TARGET_ASM_INTEGER spu_assemble_integer
7211 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7212 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7214 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7215 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7217 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7218 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7220 #undef TARGET_ASM_GLOBALIZE_LABEL
7221 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7223 #undef TARGET_PASS_BY_REFERENCE
7224 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7226 #undef TARGET_FUNCTION_ARG
7227 #define TARGET_FUNCTION_ARG spu_function_arg
7229 #undef TARGET_FUNCTION_ARG_ADVANCE
7230 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7232 #undef TARGET_MUST_PASS_IN_STACK
7233 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7235 #undef TARGET_BUILD_BUILTIN_VA_LIST
7236 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7238 #undef TARGET_EXPAND_BUILTIN_VA_START
7239 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7241 #undef TARGET_SETUP_INCOMING_VARARGS
7242 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7244 #undef TARGET_MACHINE_DEPENDENT_REORG
7245 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7247 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7248 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7250 #undef TARGET_INIT_LIBFUNCS
7251 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7253 #undef TARGET_RETURN_IN_MEMORY
7254 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7256 #undef TARGET_ENCODE_SECTION_INFO
7257 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7259 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7260 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7262 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7263 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7265 #undef TARGET_VECTORIZE_INIT_COST
7266 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7268 #undef TARGET_VECTORIZE_ADD_STMT_COST
7269 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7271 #undef TARGET_VECTORIZE_FINISH_COST
7272 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7274 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7275 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7277 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7278 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7280 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7281 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7283 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7284 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7286 #undef TARGET_SCHED_SMS_RES_MII
7287 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7289 #undef TARGET_SECTION_TYPE_FLAGS
7290 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7292 #undef TARGET_ASM_SELECT_SECTION
7293 #define TARGET_ASM_SELECT_SECTION spu_select_section
7295 #undef TARGET_ASM_UNIQUE_SECTION
7296 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7298 #undef TARGET_LEGITIMATE_ADDRESS_P
7299 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7301 #undef TARGET_LEGITIMATE_CONSTANT_P
7302 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7304 #undef TARGET_TRAMPOLINE_INIT
7305 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7307 #undef TARGET_WARN_FUNC_RETURN
7308 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7310 #undef TARGET_OPTION_OVERRIDE
7311 #define TARGET_OPTION_OVERRIDE spu_option_override
7313 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7314 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7316 #undef TARGET_REF_MAY_ALIAS_ERRNO
7317 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7319 #undef TARGET_ASM_OUTPUT_MI_THUNK
7320 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7321 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7322 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7324 /* Variable tracking should be run after all optimizations which
7325 change order of insns. It also needs a valid CFG. */
7326 #undef TARGET_DELAY_VARTRACK
7327 #define TARGET_DELAY_VARTRACK true
7329 #undef TARGET_CANONICALIZE_COMPARISON
7330 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7332 #undef TARGET_CAN_USE_DOLOOP_P
7333 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7335 struct gcc_target targetm = TARGET_INITIALIZER;
7337 #include "gt-spu.h"