[2/77] Add an E_ prefix to case statements
[official-gcc.git] / gcc / config / spu / spu.c
blob72169374668bc700f2038f86f4bb1ec81298125d
1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "target.h"
22 #include "rtl.h"
23 #include "tree.h"
24 #include "gimple.h"
25 #include "cfghooks.h"
26 #include "cfgloop.h"
27 #include "df.h"
28 #include "memmodel.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "output.h"
47 #include "cfgrtl.h"
48 #include "cfgbuild.h"
49 #include "langhooks.h"
50 #include "reload.h"
51 #include "sched-int.h"
52 #include "params.h"
53 #include "gimplify.h"
54 #include "tm-constrs.h"
55 #include "ddg.h"
56 #include "dumpfile.h"
57 #include "builtins.h"
58 #include "rtl-iter.h"
60 /* This file should be included last. */
61 #include "target-def.h"
63 /* Builtin types, data and prototypes. */
65 enum spu_builtin_type_index
67 SPU_BTI_END_OF_PARAMS,
69 /* We create new type nodes for these. */
70 SPU_BTI_V16QI,
71 SPU_BTI_V8HI,
72 SPU_BTI_V4SI,
73 SPU_BTI_V2DI,
74 SPU_BTI_V4SF,
75 SPU_BTI_V2DF,
76 SPU_BTI_UV16QI,
77 SPU_BTI_UV8HI,
78 SPU_BTI_UV4SI,
79 SPU_BTI_UV2DI,
81 /* A 16-byte type. (Implemented with V16QI_type_node) */
82 SPU_BTI_QUADWORD,
84 /* These all correspond to intSI_type_node */
85 SPU_BTI_7,
86 SPU_BTI_S7,
87 SPU_BTI_U7,
88 SPU_BTI_S10,
89 SPU_BTI_S10_4,
90 SPU_BTI_U14,
91 SPU_BTI_16,
92 SPU_BTI_S16,
93 SPU_BTI_S16_2,
94 SPU_BTI_U16,
95 SPU_BTI_U16_2,
96 SPU_BTI_U18,
98 /* These correspond to the standard types */
99 SPU_BTI_INTQI,
100 SPU_BTI_INTHI,
101 SPU_BTI_INTSI,
102 SPU_BTI_INTDI,
104 SPU_BTI_UINTQI,
105 SPU_BTI_UINTHI,
106 SPU_BTI_UINTSI,
107 SPU_BTI_UINTDI,
109 SPU_BTI_FLOAT,
110 SPU_BTI_DOUBLE,
112 SPU_BTI_VOID,
113 SPU_BTI_PTR,
115 SPU_BTI_MAX
118 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
119 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
120 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
121 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
122 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
123 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
124 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
125 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
126 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
127 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
129 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
131 struct spu_builtin_range
133 int low, high;
136 static struct spu_builtin_range spu_builtin_range[] = {
137 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
138 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
139 {0ll, 0x7fll}, /* SPU_BTI_U7 */
140 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
141 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
142 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
143 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
144 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
145 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
146 {0ll, 0xffffll}, /* SPU_BTI_U16 */
147 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
148 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
152 /* Target specific attribute specifications. */
153 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
155 /* Prototypes and external defs. */
156 static int get_pipe (rtx_insn *insn);
157 static int spu_naked_function_p (tree func);
158 static int mem_is_padded_component_ref (rtx x);
159 static void fix_range (const char *);
160 static rtx spu_expand_load (rtx, rtx, rtx, int);
162 /* Which instruction set architecture to use. */
163 int spu_arch;
164 /* Which cpu are we tuning for. */
165 int spu_tune;
167 /* The hardware requires 8 insns between a hint and the branch it
168 effects. This variable describes how many rtl instructions the
169 compiler needs to see before inserting a hint, and then the compiler
170 will insert enough nops to make it at least 8 insns. The default is
171 for the compiler to allow up to 2 nops be emitted. The nops are
172 inserted in pairs, so we round down. */
173 int spu_hint_dist = (8*4) - (2*4);
175 enum spu_immediate {
176 SPU_NONE,
177 SPU_IL,
178 SPU_ILA,
179 SPU_ILH,
180 SPU_ILHU,
181 SPU_ORI,
182 SPU_ORHI,
183 SPU_ORBI,
184 SPU_IOHL
186 enum immediate_class
188 IC_POOL, /* constant pool */
189 IC_IL1, /* one il* instruction */
190 IC_IL2, /* both ilhu and iohl instructions */
191 IC_IL1s, /* one il* instruction */
192 IC_IL2s, /* both ilhu and iohl instructions */
193 IC_FSMBI, /* the fsmbi instruction */
194 IC_CPAT, /* one of the c*d instructions */
195 IC_FSMBI2 /* fsmbi plus 1 other instruction */
198 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
199 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
200 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
201 static enum immediate_class classify_immediate (rtx op,
202 machine_mode mode);
204 /* Pointer mode for __ea references. */
205 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
208 /* Define the structure for the machine field in struct function. */
209 struct GTY(()) machine_function
211 /* Register to use for PIC accesses. */
212 rtx pic_reg;
215 /* How to allocate a 'struct machine_function'. */
216 static struct machine_function *
217 spu_init_machine_status (void)
219 return ggc_cleared_alloc<machine_function> ();
222 /* Implement TARGET_OPTION_OVERRIDE. */
223 static void
224 spu_option_override (void)
226 /* Set up function hooks. */
227 init_machine_status = spu_init_machine_status;
229 /* Small loops will be unpeeled at -O3. For SPU it is more important
230 to keep code small by default. */
231 if (!flag_unroll_loops && !flag_peel_loops)
232 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
233 global_options.x_param_values,
234 global_options_set.x_param_values);
236 flag_omit_frame_pointer = 1;
238 /* Functions must be 8 byte aligned so we correctly handle dual issue */
239 if (align_functions < 8)
240 align_functions = 8;
242 spu_hint_dist = 8*4 - spu_max_nops*4;
243 if (spu_hint_dist < 0)
244 spu_hint_dist = 0;
246 if (spu_fixed_range_string)
247 fix_range (spu_fixed_range_string);
249 /* Determine processor architectural level. */
250 if (spu_arch_string)
252 if (strcmp (&spu_arch_string[0], "cell") == 0)
253 spu_arch = PROCESSOR_CELL;
254 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
255 spu_arch = PROCESSOR_CELLEDP;
256 else
257 error ("bad value (%s) for -march= switch", spu_arch_string);
260 /* Determine processor to tune for. */
261 if (spu_tune_string)
263 if (strcmp (&spu_tune_string[0], "cell") == 0)
264 spu_tune = PROCESSOR_CELL;
265 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
266 spu_tune = PROCESSOR_CELLEDP;
267 else
268 error ("bad value (%s) for -mtune= switch", spu_tune_string);
271 /* Change defaults according to the processor architecture. */
272 if (spu_arch == PROCESSOR_CELLEDP)
274 /* If no command line option has been otherwise specified, change
275 the default to -mno-safe-hints on celledp -- only the original
276 Cell/B.E. processors require this workaround. */
277 if (!(target_flags_explicit & MASK_SAFE_HINTS))
278 target_flags &= ~MASK_SAFE_HINTS;
281 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
284 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
285 struct attribute_spec.handler. */
287 /* True if MODE is valid for the target. By "valid", we mean able to
288 be manipulated in non-trivial ways. In particular, this means all
289 the arithmetic is supported. */
290 static bool
291 spu_scalar_mode_supported_p (machine_mode mode)
293 switch (mode)
295 case E_QImode:
296 case E_HImode:
297 case E_SImode:
298 case E_SFmode:
299 case E_DImode:
300 case E_TImode:
301 case E_DFmode:
302 return true;
304 default:
305 return false;
309 /* Similarly for vector modes. "Supported" here is less strict. At
310 least some operations are supported; need to check optabs or builtins
311 for further details. */
312 static bool
313 spu_vector_mode_supported_p (machine_mode mode)
315 switch (mode)
317 case E_V16QImode:
318 case E_V8HImode:
319 case E_V4SImode:
320 case E_V2DImode:
321 case E_V4SFmode:
322 case E_V2DFmode:
323 return true;
325 default:
326 return false;
330 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
331 least significant bytes of the outer mode. This function returns
332 TRUE for the SUBREG's where this is correct. */
334 valid_subreg (rtx op)
336 machine_mode om = GET_MODE (op);
337 machine_mode im = GET_MODE (SUBREG_REG (op));
338 return om != VOIDmode && im != VOIDmode
339 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
340 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
341 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
344 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
345 and adjust the start offset. */
346 static rtx
347 adjust_operand (rtx op, HOST_WIDE_INT * start)
349 machine_mode mode;
350 int op_size;
351 /* Strip any paradoxical SUBREG. */
352 if (GET_CODE (op) == SUBREG
353 && (GET_MODE_BITSIZE (GET_MODE (op))
354 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
356 if (start)
357 *start -=
358 GET_MODE_BITSIZE (GET_MODE (op)) -
359 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
360 op = SUBREG_REG (op);
362 /* If it is smaller than SI, assure a SUBREG */
363 op_size = GET_MODE_BITSIZE (GET_MODE (op));
364 if (op_size < 32)
366 if (start)
367 *start += 32 - op_size;
368 op_size = 32;
370 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
371 mode = mode_for_size (op_size, MODE_INT, 0);
372 if (mode != GET_MODE (op))
373 op = gen_rtx_SUBREG (mode, op, 0);
374 return op;
377 void
378 spu_expand_extv (rtx ops[], int unsignedp)
380 rtx dst = ops[0], src = ops[1];
381 HOST_WIDE_INT width = INTVAL (ops[2]);
382 HOST_WIDE_INT start = INTVAL (ops[3]);
383 HOST_WIDE_INT align_mask;
384 rtx s0, s1, mask, r0;
386 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
388 if (MEM_P (src))
390 /* First, determine if we need 1 TImode load or 2. We need only 1
391 if the bits being extracted do not cross the alignment boundary
392 as determined by the MEM and its address. */
394 align_mask = -MEM_ALIGN (src);
395 if ((start & align_mask) == ((start + width - 1) & align_mask))
397 /* Alignment is sufficient for 1 load. */
398 s0 = gen_reg_rtx (TImode);
399 r0 = spu_expand_load (s0, 0, src, start / 8);
400 start &= 7;
401 if (r0)
402 emit_insn (gen_rotqby_ti (s0, s0, r0));
404 else
406 /* Need 2 loads. */
407 s0 = gen_reg_rtx (TImode);
408 s1 = gen_reg_rtx (TImode);
409 r0 = spu_expand_load (s0, s1, src, start / 8);
410 start &= 7;
412 gcc_assert (start + width <= 128);
413 if (r0)
415 rtx r1 = gen_reg_rtx (SImode);
416 mask = gen_reg_rtx (TImode);
417 emit_move_insn (mask, GEN_INT (-1));
418 emit_insn (gen_rotqby_ti (s0, s0, r0));
419 emit_insn (gen_rotqby_ti (s1, s1, r0));
420 if (GET_CODE (r0) == CONST_INT)
421 r1 = GEN_INT (INTVAL (r0) & 15);
422 else
423 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
424 emit_insn (gen_shlqby_ti (mask, mask, r1));
425 emit_insn (gen_selb (s0, s1, s0, mask));
430 else if (GET_CODE (src) == SUBREG)
432 rtx r = SUBREG_REG (src);
433 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
434 s0 = gen_reg_rtx (TImode);
435 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
436 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
437 else
438 emit_move_insn (s0, src);
440 else
442 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
443 s0 = gen_reg_rtx (TImode);
444 emit_move_insn (s0, src);
447 /* Now s0 is TImode and contains the bits to extract at start. */
449 if (start)
450 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
452 if (128 - width)
453 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
455 emit_move_insn (dst, s0);
458 void
459 spu_expand_insv (rtx ops[])
461 HOST_WIDE_INT width = INTVAL (ops[1]);
462 HOST_WIDE_INT start = INTVAL (ops[2]);
463 unsigned HOST_WIDE_INT maskbits;
464 machine_mode dst_mode;
465 rtx dst = ops[0], src = ops[3];
466 int dst_size;
467 rtx mask;
468 rtx shift_reg;
469 int shift;
472 if (GET_CODE (ops[0]) == MEM)
473 dst = gen_reg_rtx (TImode);
474 else
475 dst = adjust_operand (dst, &start);
476 dst_mode = GET_MODE (dst);
477 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
479 if (CONSTANT_P (src))
481 machine_mode m =
482 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
483 src = force_reg (m, convert_to_mode (m, src, 0));
485 src = adjust_operand (src, 0);
487 mask = gen_reg_rtx (dst_mode);
488 shift_reg = gen_reg_rtx (dst_mode);
489 shift = dst_size - start - width;
491 /* It's not safe to use subreg here because the compiler assumes
492 that the SUBREG_REG is right justified in the SUBREG. */
493 convert_move (shift_reg, src, 1);
495 if (shift > 0)
497 switch (dst_mode)
499 case E_SImode:
500 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
501 break;
502 case E_DImode:
503 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
504 break;
505 case E_TImode:
506 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
507 break;
508 default:
509 abort ();
512 else if (shift < 0)
513 abort ();
515 switch (dst_size)
517 case 32:
518 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
519 if (start)
520 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
521 emit_move_insn (mask, GEN_INT (maskbits));
522 break;
523 case 64:
524 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
525 if (start)
526 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
527 emit_move_insn (mask, GEN_INT (maskbits));
528 break;
529 case 128:
531 unsigned char arr[16];
532 int i = start / 8;
533 memset (arr, 0, sizeof (arr));
534 arr[i] = 0xff >> (start & 7);
535 for (i++; i <= (start + width - 1) / 8; i++)
536 arr[i] = 0xff;
537 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
538 emit_move_insn (mask, array_to_constant (TImode, arr));
540 break;
541 default:
542 abort ();
544 if (GET_CODE (ops[0]) == MEM)
546 rtx low = gen_reg_rtx (SImode);
547 rtx rotl = gen_reg_rtx (SImode);
548 rtx mask0 = gen_reg_rtx (TImode);
549 rtx addr;
550 rtx addr0;
551 rtx addr1;
552 rtx mem;
554 addr = force_reg (Pmode, XEXP (ops[0], 0));
555 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
556 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
557 emit_insn (gen_negsi2 (rotl, low));
558 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
559 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
560 mem = change_address (ops[0], TImode, addr0);
561 set_mem_alias_set (mem, 0);
562 emit_move_insn (dst, mem);
563 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
564 if (start + width > MEM_ALIGN (ops[0]))
566 rtx shl = gen_reg_rtx (SImode);
567 rtx mask1 = gen_reg_rtx (TImode);
568 rtx dst1 = gen_reg_rtx (TImode);
569 rtx mem1;
570 addr1 = plus_constant (Pmode, addr, 16);
571 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
572 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
573 emit_insn (gen_shlqby_ti (mask1, mask, shl));
574 mem1 = change_address (ops[0], TImode, addr1);
575 set_mem_alias_set (mem1, 0);
576 emit_move_insn (dst1, mem1);
577 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
578 emit_move_insn (mem1, dst1);
580 emit_move_insn (mem, dst);
582 else
583 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
588 spu_expand_block_move (rtx ops[])
590 HOST_WIDE_INT bytes, align, offset;
591 rtx src, dst, sreg, dreg, target;
592 int i;
593 if (GET_CODE (ops[2]) != CONST_INT
594 || GET_CODE (ops[3]) != CONST_INT
595 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
596 return 0;
598 bytes = INTVAL (ops[2]);
599 align = INTVAL (ops[3]);
601 if (bytes <= 0)
602 return 1;
604 dst = ops[0];
605 src = ops[1];
607 if (align == 16)
609 for (offset = 0; offset + 16 <= bytes; offset += 16)
611 dst = adjust_address (ops[0], V16QImode, offset);
612 src = adjust_address (ops[1], V16QImode, offset);
613 emit_move_insn (dst, src);
615 if (offset < bytes)
617 rtx mask;
618 unsigned char arr[16] = { 0 };
619 for (i = 0; i < bytes - offset; i++)
620 arr[i] = 0xff;
621 dst = adjust_address (ops[0], V16QImode, offset);
622 src = adjust_address (ops[1], V16QImode, offset);
623 mask = gen_reg_rtx (V16QImode);
624 sreg = gen_reg_rtx (V16QImode);
625 dreg = gen_reg_rtx (V16QImode);
626 target = gen_reg_rtx (V16QImode);
627 emit_move_insn (mask, array_to_constant (V16QImode, arr));
628 emit_move_insn (dreg, dst);
629 emit_move_insn (sreg, src);
630 emit_insn (gen_selb (target, dreg, sreg, mask));
631 emit_move_insn (dst, target);
633 return 1;
635 return 0;
638 enum spu_comp_code
639 { SPU_EQ, SPU_GT, SPU_GTU };
641 int spu_comp_icode[12][3] = {
642 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
643 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
644 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
645 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
646 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
647 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
648 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
649 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
650 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
651 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
652 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
653 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
656 /* Generate a compare for CODE. Return a brand-new rtx that represents
657 the result of the compare. GCC can figure this out too if we don't
658 provide all variations of compares, but GCC always wants to use
659 WORD_MODE, we can generate better code in most cases if we do it
660 ourselves. */
661 void
662 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
664 int reverse_compare = 0;
665 int reverse_test = 0;
666 rtx compare_result, eq_result;
667 rtx comp_rtx, eq_rtx;
668 machine_mode comp_mode;
669 machine_mode op_mode;
670 enum spu_comp_code scode, eq_code;
671 enum insn_code ior_code;
672 enum rtx_code code = GET_CODE (cmp);
673 rtx op0 = XEXP (cmp, 0);
674 rtx op1 = XEXP (cmp, 1);
675 int index;
676 int eq_test = 0;
678 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
679 and so on, to keep the constant in operand 1. */
680 if (GET_CODE (op1) == CONST_INT)
682 HOST_WIDE_INT val = INTVAL (op1) - 1;
683 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
684 switch (code)
686 case GE:
687 op1 = GEN_INT (val);
688 code = GT;
689 break;
690 case LT:
691 op1 = GEN_INT (val);
692 code = LE;
693 break;
694 case GEU:
695 op1 = GEN_INT (val);
696 code = GTU;
697 break;
698 case LTU:
699 op1 = GEN_INT (val);
700 code = LEU;
701 break;
702 default:
703 break;
707 /* However, if we generate an integer result, performing a reverse test
708 would require an extra negation, so avoid that where possible. */
709 if (GET_CODE (op1) == CONST_INT && is_set == 1)
711 HOST_WIDE_INT val = INTVAL (op1) + 1;
712 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
713 switch (code)
715 case LE:
716 op1 = GEN_INT (val);
717 code = LT;
718 break;
719 case LEU:
720 op1 = GEN_INT (val);
721 code = LTU;
722 break;
723 default:
724 break;
728 comp_mode = SImode;
729 op_mode = GET_MODE (op0);
731 switch (code)
733 case GE:
734 scode = SPU_GT;
735 if (HONOR_NANS (op_mode))
737 reverse_compare = 0;
738 reverse_test = 0;
739 eq_test = 1;
740 eq_code = SPU_EQ;
742 else
744 reverse_compare = 1;
745 reverse_test = 1;
747 break;
748 case LE:
749 scode = SPU_GT;
750 if (HONOR_NANS (op_mode))
752 reverse_compare = 1;
753 reverse_test = 0;
754 eq_test = 1;
755 eq_code = SPU_EQ;
757 else
759 reverse_compare = 0;
760 reverse_test = 1;
762 break;
763 case LT:
764 reverse_compare = 1;
765 reverse_test = 0;
766 scode = SPU_GT;
767 break;
768 case GEU:
769 reverse_compare = 1;
770 reverse_test = 1;
771 scode = SPU_GTU;
772 break;
773 case LEU:
774 reverse_compare = 0;
775 reverse_test = 1;
776 scode = SPU_GTU;
777 break;
778 case LTU:
779 reverse_compare = 1;
780 reverse_test = 0;
781 scode = SPU_GTU;
782 break;
783 case NE:
784 reverse_compare = 0;
785 reverse_test = 1;
786 scode = SPU_EQ;
787 break;
789 case EQ:
790 scode = SPU_EQ;
791 break;
792 case GT:
793 scode = SPU_GT;
794 break;
795 case GTU:
796 scode = SPU_GTU;
797 break;
798 default:
799 scode = SPU_EQ;
800 break;
803 switch (op_mode)
805 case E_QImode:
806 index = 0;
807 comp_mode = QImode;
808 break;
809 case E_HImode:
810 index = 1;
811 comp_mode = HImode;
812 break;
813 case E_SImode:
814 index = 2;
815 break;
816 case E_DImode:
817 index = 3;
818 break;
819 case E_TImode:
820 index = 4;
821 break;
822 case E_SFmode:
823 index = 5;
824 break;
825 case E_DFmode:
826 index = 6;
827 break;
828 case E_V16QImode:
829 index = 7;
830 comp_mode = op_mode;
831 break;
832 case E_V8HImode:
833 index = 8;
834 comp_mode = op_mode;
835 break;
836 case E_V4SImode:
837 index = 9;
838 comp_mode = op_mode;
839 break;
840 case E_V4SFmode:
841 index = 10;
842 comp_mode = V4SImode;
843 break;
844 case E_V2DFmode:
845 index = 11;
846 comp_mode = V2DImode;
847 break;
848 case E_V2DImode:
849 default:
850 abort ();
853 if (GET_MODE (op1) == DFmode
854 && (scode != SPU_GT && scode != SPU_EQ))
855 abort ();
857 if (is_set == 0 && op1 == const0_rtx
858 && (GET_MODE (op0) == SImode
859 || GET_MODE (op0) == HImode
860 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
862 /* Don't need to set a register with the result when we are
863 comparing against zero and branching. */
864 reverse_test = !reverse_test;
865 compare_result = op0;
867 else
869 compare_result = gen_reg_rtx (comp_mode);
871 if (reverse_compare)
873 rtx t = op1;
874 op1 = op0;
875 op0 = t;
878 if (spu_comp_icode[index][scode] == 0)
879 abort ();
881 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
882 (op0, op_mode))
883 op0 = force_reg (op_mode, op0);
884 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
885 (op1, op_mode))
886 op1 = force_reg (op_mode, op1);
887 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
888 op0, op1);
889 if (comp_rtx == 0)
890 abort ();
891 emit_insn (comp_rtx);
893 if (eq_test)
895 eq_result = gen_reg_rtx (comp_mode);
896 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
897 op0, op1);
898 if (eq_rtx == 0)
899 abort ();
900 emit_insn (eq_rtx);
901 ior_code = optab_handler (ior_optab, comp_mode);
902 gcc_assert (ior_code != CODE_FOR_nothing);
903 emit_insn (GEN_FCN (ior_code)
904 (compare_result, compare_result, eq_result));
908 if (is_set == 0)
910 rtx bcomp;
911 rtx loc_ref;
913 /* We don't have branch on QI compare insns, so we convert the
914 QI compare result to a HI result. */
915 if (comp_mode == QImode)
917 rtx old_res = compare_result;
918 compare_result = gen_reg_rtx (HImode);
919 comp_mode = HImode;
920 emit_insn (gen_extendqihi2 (compare_result, old_res));
923 if (reverse_test)
924 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
925 else
926 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
928 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
929 emit_jump_insn (gen_rtx_SET (pc_rtx,
930 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
931 loc_ref, pc_rtx)));
933 else if (is_set == 2)
935 rtx target = operands[0];
936 int compare_size = GET_MODE_BITSIZE (comp_mode);
937 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
938 machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
939 rtx select_mask;
940 rtx op_t = operands[2];
941 rtx op_f = operands[3];
943 /* The result of the comparison can be SI, HI or QI mode. Create a
944 mask based on that result. */
945 if (target_size > compare_size)
947 select_mask = gen_reg_rtx (mode);
948 emit_insn (gen_extend_compare (select_mask, compare_result));
950 else if (target_size < compare_size)
951 select_mask =
952 gen_rtx_SUBREG (mode, compare_result,
953 (compare_size - target_size) / BITS_PER_UNIT);
954 else if (comp_mode != mode)
955 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
956 else
957 select_mask = compare_result;
959 if (GET_MODE (target) != GET_MODE (op_t)
960 || GET_MODE (target) != GET_MODE (op_f))
961 abort ();
963 if (reverse_test)
964 emit_insn (gen_selb (target, op_t, op_f, select_mask));
965 else
966 emit_insn (gen_selb (target, op_f, op_t, select_mask));
968 else
970 rtx target = operands[0];
971 if (reverse_test)
972 emit_insn (gen_rtx_SET (compare_result,
973 gen_rtx_NOT (comp_mode, compare_result)));
974 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
975 emit_insn (gen_extendhisi2 (target, compare_result));
976 else if (GET_MODE (target) == SImode
977 && GET_MODE (compare_result) == QImode)
978 emit_insn (gen_extend_compare (target, compare_result));
979 else
980 emit_move_insn (target, compare_result);
984 HOST_WIDE_INT
985 const_double_to_hwint (rtx x)
987 HOST_WIDE_INT val;
988 if (GET_MODE (x) == SFmode)
989 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
990 else if (GET_MODE (x) == DFmode)
992 long l[2];
993 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
994 val = l[0];
995 val = (val << 32) | (l[1] & 0xffffffff);
997 else
998 abort ();
999 return val;
1003 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1005 long tv[2];
1006 REAL_VALUE_TYPE rv;
1007 gcc_assert (mode == SFmode || mode == DFmode);
1009 if (mode == SFmode)
1010 tv[0] = (v << 32) >> 32;
1011 else if (mode == DFmode)
1013 tv[1] = (v << 32) >> 32;
1014 tv[0] = v >> 32;
1016 real_from_target (&rv, tv, mode);
1017 return const_double_from_real_value (rv, mode);
1020 void
1021 print_operand_address (FILE * file, register rtx addr)
1023 rtx reg;
1024 rtx offset;
1026 if (GET_CODE (addr) == AND
1027 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1028 && INTVAL (XEXP (addr, 1)) == -16)
1029 addr = XEXP (addr, 0);
1031 switch (GET_CODE (addr))
1033 case REG:
1034 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1035 break;
1037 case PLUS:
1038 reg = XEXP (addr, 0);
1039 offset = XEXP (addr, 1);
1040 if (GET_CODE (offset) == REG)
1042 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1043 reg_names[REGNO (offset)]);
1045 else if (GET_CODE (offset) == CONST_INT)
1047 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1048 INTVAL (offset), reg_names[REGNO (reg)]);
1050 else
1051 abort ();
1052 break;
1054 case CONST:
1055 case LABEL_REF:
1056 case SYMBOL_REF:
1057 case CONST_INT:
1058 output_addr_const (file, addr);
1059 break;
1061 default:
1062 debug_rtx (addr);
1063 abort ();
1067 void
1068 print_operand (FILE * file, rtx x, int code)
1070 machine_mode mode = GET_MODE (x);
1071 HOST_WIDE_INT val;
1072 unsigned char arr[16];
1073 int xcode = GET_CODE (x);
1074 int i, info;
1075 if (GET_MODE (x) == VOIDmode)
1076 switch (code)
1078 case 'L': /* 128 bits, signed */
1079 case 'm': /* 128 bits, signed */
1080 case 'T': /* 128 bits, signed */
1081 case 't': /* 128 bits, signed */
1082 mode = TImode;
1083 break;
1084 case 'K': /* 64 bits, signed */
1085 case 'k': /* 64 bits, signed */
1086 case 'D': /* 64 bits, signed */
1087 case 'd': /* 64 bits, signed */
1088 mode = DImode;
1089 break;
1090 case 'J': /* 32 bits, signed */
1091 case 'j': /* 32 bits, signed */
1092 case 's': /* 32 bits, signed */
1093 case 'S': /* 32 bits, signed */
1094 mode = SImode;
1095 break;
1097 switch (code)
1100 case 'j': /* 32 bits, signed */
1101 case 'k': /* 64 bits, signed */
1102 case 'm': /* 128 bits, signed */
1103 if (xcode == CONST_INT
1104 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1106 gcc_assert (logical_immediate_p (x, mode));
1107 constant_to_array (mode, x, arr);
1108 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1109 val = trunc_int_for_mode (val, SImode);
1110 switch (which_logical_immediate (val))
1112 case SPU_ORI:
1113 break;
1114 case SPU_ORHI:
1115 fprintf (file, "h");
1116 break;
1117 case SPU_ORBI:
1118 fprintf (file, "b");
1119 break;
1120 default:
1121 gcc_unreachable();
1124 else
1125 gcc_unreachable();
1126 return;
1128 case 'J': /* 32 bits, signed */
1129 case 'K': /* 64 bits, signed */
1130 case 'L': /* 128 bits, signed */
1131 if (xcode == CONST_INT
1132 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1134 gcc_assert (logical_immediate_p (x, mode)
1135 || iohl_immediate_p (x, mode));
1136 constant_to_array (mode, x, arr);
1137 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1138 val = trunc_int_for_mode (val, SImode);
1139 switch (which_logical_immediate (val))
1141 case SPU_ORI:
1142 case SPU_IOHL:
1143 break;
1144 case SPU_ORHI:
1145 val = trunc_int_for_mode (val, HImode);
1146 break;
1147 case SPU_ORBI:
1148 val = trunc_int_for_mode (val, QImode);
1149 break;
1150 default:
1151 gcc_unreachable();
1153 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1155 else
1156 gcc_unreachable();
1157 return;
1159 case 't': /* 128 bits, signed */
1160 case 'd': /* 64 bits, signed */
1161 case 's': /* 32 bits, signed */
1162 if (CONSTANT_P (x))
1164 enum immediate_class c = classify_immediate (x, mode);
1165 switch (c)
1167 case IC_IL1:
1168 constant_to_array (mode, x, arr);
1169 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1170 val = trunc_int_for_mode (val, SImode);
1171 switch (which_immediate_load (val))
1173 case SPU_IL:
1174 break;
1175 case SPU_ILA:
1176 fprintf (file, "a");
1177 break;
1178 case SPU_ILH:
1179 fprintf (file, "h");
1180 break;
1181 case SPU_ILHU:
1182 fprintf (file, "hu");
1183 break;
1184 default:
1185 gcc_unreachable ();
1187 break;
1188 case IC_CPAT:
1189 constant_to_array (mode, x, arr);
1190 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1191 if (info == 1)
1192 fprintf (file, "b");
1193 else if (info == 2)
1194 fprintf (file, "h");
1195 else if (info == 4)
1196 fprintf (file, "w");
1197 else if (info == 8)
1198 fprintf (file, "d");
1199 break;
1200 case IC_IL1s:
1201 if (xcode == CONST_VECTOR)
1203 x = CONST_VECTOR_ELT (x, 0);
1204 xcode = GET_CODE (x);
1206 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1207 fprintf (file, "a");
1208 else if (xcode == HIGH)
1209 fprintf (file, "hu");
1210 break;
1211 case IC_FSMBI:
1212 case IC_FSMBI2:
1213 case IC_IL2:
1214 case IC_IL2s:
1215 case IC_POOL:
1216 abort ();
1219 else
1220 gcc_unreachable ();
1221 return;
1223 case 'T': /* 128 bits, signed */
1224 case 'D': /* 64 bits, signed */
1225 case 'S': /* 32 bits, signed */
1226 if (CONSTANT_P (x))
1228 enum immediate_class c = classify_immediate (x, mode);
1229 switch (c)
1231 case IC_IL1:
1232 constant_to_array (mode, x, arr);
1233 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1234 val = trunc_int_for_mode (val, SImode);
1235 switch (which_immediate_load (val))
1237 case SPU_IL:
1238 case SPU_ILA:
1239 break;
1240 case SPU_ILH:
1241 case SPU_ILHU:
1242 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1243 break;
1244 default:
1245 gcc_unreachable ();
1247 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1248 break;
1249 case IC_FSMBI:
1250 constant_to_array (mode, x, arr);
1251 val = 0;
1252 for (i = 0; i < 16; i++)
1254 val <<= 1;
1255 val |= arr[i] & 1;
1257 print_operand (file, GEN_INT (val), 0);
1258 break;
1259 case IC_CPAT:
1260 constant_to_array (mode, x, arr);
1261 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1262 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1263 break;
1264 case IC_IL1s:
1265 if (xcode == HIGH)
1266 x = XEXP (x, 0);
1267 if (GET_CODE (x) == CONST_VECTOR)
1268 x = CONST_VECTOR_ELT (x, 0);
1269 output_addr_const (file, x);
1270 if (xcode == HIGH)
1271 fprintf (file, "@h");
1272 break;
1273 case IC_IL2:
1274 case IC_IL2s:
1275 case IC_FSMBI2:
1276 case IC_POOL:
1277 abort ();
1280 else
1281 gcc_unreachable ();
1282 return;
1284 case 'C':
1285 if (xcode == CONST_INT)
1287 /* Only 4 least significant bits are relevant for generate
1288 control word instructions. */
1289 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1290 return;
1292 break;
1294 case 'M': /* print code for c*d */
1295 if (GET_CODE (x) == CONST_INT)
1296 switch (INTVAL (x))
1298 case 1:
1299 fprintf (file, "b");
1300 break;
1301 case 2:
1302 fprintf (file, "h");
1303 break;
1304 case 4:
1305 fprintf (file, "w");
1306 break;
1307 case 8:
1308 fprintf (file, "d");
1309 break;
1310 default:
1311 gcc_unreachable();
1313 else
1314 gcc_unreachable();
1315 return;
1317 case 'N': /* Negate the operand */
1318 if (xcode == CONST_INT)
1319 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1320 else if (xcode == CONST_VECTOR)
1321 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1322 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1323 return;
1325 case 'I': /* enable/disable interrupts */
1326 if (xcode == CONST_INT)
1327 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1328 return;
1330 case 'b': /* branch modifiers */
1331 if (xcode == REG)
1332 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1333 else if (COMPARISON_P (x))
1334 fprintf (file, "%s", xcode == NE ? "n" : "");
1335 return;
1337 case 'i': /* indirect call */
1338 if (xcode == MEM)
1340 if (GET_CODE (XEXP (x, 0)) == REG)
1341 /* Used in indirect function calls. */
1342 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1343 else
1344 output_address (GET_MODE (x), XEXP (x, 0));
1346 return;
1348 case 'p': /* load/store */
1349 if (xcode == MEM)
1351 x = XEXP (x, 0);
1352 xcode = GET_CODE (x);
1354 if (xcode == AND)
1356 x = XEXP (x, 0);
1357 xcode = GET_CODE (x);
1359 if (xcode == REG)
1360 fprintf (file, "d");
1361 else if (xcode == CONST_INT)
1362 fprintf (file, "a");
1363 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1364 fprintf (file, "r");
1365 else if (xcode == PLUS || xcode == LO_SUM)
1367 if (GET_CODE (XEXP (x, 1)) == REG)
1368 fprintf (file, "x");
1369 else
1370 fprintf (file, "d");
1372 return;
1374 case 'e':
1375 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1376 val &= 0x7;
1377 output_addr_const (file, GEN_INT (val));
1378 return;
1380 case 'f':
1381 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1382 val &= 0x1f;
1383 output_addr_const (file, GEN_INT (val));
1384 return;
1386 case 'g':
1387 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1388 val &= 0x3f;
1389 output_addr_const (file, GEN_INT (val));
1390 return;
1392 case 'h':
1393 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1394 val = (val >> 3) & 0x1f;
1395 output_addr_const (file, GEN_INT (val));
1396 return;
1398 case 'E':
1399 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1400 val = -val;
1401 val &= 0x7;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1405 case 'F':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val = -val;
1408 val &= 0x1f;
1409 output_addr_const (file, GEN_INT (val));
1410 return;
1412 case 'G':
1413 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1414 val = -val;
1415 val &= 0x3f;
1416 output_addr_const (file, GEN_INT (val));
1417 return;
1419 case 'H':
1420 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1421 val = -(val & -8ll);
1422 val = (val >> 3) & 0x1f;
1423 output_addr_const (file, GEN_INT (val));
1424 return;
1426 case 'v':
1427 case 'w':
1428 constant_to_array (mode, x, arr);
1429 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1430 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1431 return;
1433 case 0:
1434 if (xcode == REG)
1435 fprintf (file, "%s", reg_names[REGNO (x)]);
1436 else if (xcode == MEM)
1437 output_address (GET_MODE (x), XEXP (x, 0));
1438 else if (xcode == CONST_VECTOR)
1439 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1440 else
1441 output_addr_const (file, x);
1442 return;
1444 /* unused letters
1445 o qr u yz
1446 AB OPQR UVWXYZ */
1447 default:
1448 output_operand_lossage ("invalid %%xn code");
1450 gcc_unreachable ();
1453 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1454 caller saved register. For leaf functions it is more efficient to
1455 use a volatile register because we won't need to save and restore the
1456 pic register. This routine is only valid after register allocation
1457 is completed, so we can pick an unused register. */
1458 static rtx
1459 get_pic_reg (void)
1461 if (!reload_completed && !reload_in_progress)
1462 abort ();
1464 /* If we've already made the decision, we need to keep with it. Once we've
1465 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1466 return true since the register is now live; this should not cause us to
1467 "switch back" to using pic_offset_table_rtx. */
1468 if (!cfun->machine->pic_reg)
1470 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1471 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1472 else
1473 cfun->machine->pic_reg = pic_offset_table_rtx;
1476 return cfun->machine->pic_reg;
1479 /* Split constant addresses to handle cases that are too large.
1480 Add in the pic register when in PIC mode.
1481 Split immediates that require more than 1 instruction. */
1483 spu_split_immediate (rtx * ops)
1485 machine_mode mode = GET_MODE (ops[0]);
1486 enum immediate_class c = classify_immediate (ops[1], mode);
1488 switch (c)
1490 case IC_IL2:
1492 unsigned char arrhi[16];
1493 unsigned char arrlo[16];
1494 rtx to, temp, hi, lo;
1495 int i;
1496 machine_mode imode = mode;
1497 /* We need to do reals as ints because the constant used in the
1498 IOR might not be a legitimate real constant. */
1499 imode = int_mode_for_mode (mode);
1500 constant_to_array (mode, ops[1], arrhi);
1501 if (imode != mode)
1502 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1503 else
1504 to = ops[0];
1505 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1506 for (i = 0; i < 16; i += 4)
1508 arrlo[i + 2] = arrhi[i + 2];
1509 arrlo[i + 3] = arrhi[i + 3];
1510 arrlo[i + 0] = arrlo[i + 1] = 0;
1511 arrhi[i + 2] = arrhi[i + 3] = 0;
1513 hi = array_to_constant (imode, arrhi);
1514 lo = array_to_constant (imode, arrlo);
1515 emit_move_insn (temp, hi);
1516 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1517 return 1;
1519 case IC_FSMBI2:
1521 unsigned char arr_fsmbi[16];
1522 unsigned char arr_andbi[16];
1523 rtx to, reg_fsmbi, reg_and;
1524 int i;
1525 machine_mode imode = mode;
1526 /* We need to do reals as ints because the constant used in the
1527 * AND might not be a legitimate real constant. */
1528 imode = int_mode_for_mode (mode);
1529 constant_to_array (mode, ops[1], arr_fsmbi);
1530 if (imode != mode)
1531 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1532 else
1533 to = ops[0];
1534 for (i = 0; i < 16; i++)
1535 if (arr_fsmbi[i] != 0)
1537 arr_andbi[0] = arr_fsmbi[i];
1538 arr_fsmbi[i] = 0xff;
1540 for (i = 1; i < 16; i++)
1541 arr_andbi[i] = arr_andbi[0];
1542 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1543 reg_and = array_to_constant (imode, arr_andbi);
1544 emit_move_insn (to, reg_fsmbi);
1545 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1546 return 1;
1548 case IC_POOL:
1549 if (reload_in_progress || reload_completed)
1551 rtx mem = force_const_mem (mode, ops[1]);
1552 if (TARGET_LARGE_MEM)
1554 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1555 emit_move_insn (addr, XEXP (mem, 0));
1556 mem = replace_equiv_address (mem, addr);
1558 emit_move_insn (ops[0], mem);
1559 return 1;
1561 break;
1562 case IC_IL1s:
1563 case IC_IL2s:
1564 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1566 if (c == IC_IL2s)
1568 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1569 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1571 else if (flag_pic)
1572 emit_insn (gen_pic (ops[0], ops[1]));
1573 if (flag_pic)
1575 rtx pic_reg = get_pic_reg ();
1576 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1578 return flag_pic || c == IC_IL2s;
1580 break;
1581 case IC_IL1:
1582 case IC_FSMBI:
1583 case IC_CPAT:
1584 break;
1586 return 0;
1589 /* SAVING is TRUE when we are generating the actual load and store
1590 instructions for REGNO. When determining the size of the stack
1591 needed for saving register we must allocate enough space for the
1592 worst case, because we don't always have the information early enough
1593 to not allocate it. But we can at least eliminate the actual loads
1594 and stores during the prologue/epilogue. */
1595 static int
1596 need_to_save_reg (int regno, int saving)
1598 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1599 return 1;
1600 if (flag_pic
1601 && regno == PIC_OFFSET_TABLE_REGNUM
1602 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1603 return 1;
1604 return 0;
1607 /* This function is only correct starting with local register
1608 allocation */
1610 spu_saved_regs_size (void)
1612 int reg_save_size = 0;
1613 int regno;
1615 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1616 if (need_to_save_reg (regno, 0))
1617 reg_save_size += 0x10;
1618 return reg_save_size;
1621 static rtx_insn *
1622 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1624 rtx reg = gen_rtx_REG (V4SImode, regno);
1625 rtx mem =
1626 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1627 return emit_insn (gen_movv4si (mem, reg));
1630 static rtx_insn *
1631 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1633 rtx reg = gen_rtx_REG (V4SImode, regno);
1634 rtx mem =
1635 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1636 return emit_insn (gen_movv4si (reg, mem));
1639 /* This happens after reload, so we need to expand it. */
1640 static rtx_insn *
1641 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1643 rtx_insn *insn;
1644 if (satisfies_constraint_K (GEN_INT (imm)))
1646 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1648 else
1650 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1651 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1652 if (REGNO (src) == REGNO (scratch))
1653 abort ();
1655 return insn;
1658 /* Return nonzero if this function is known to have a null epilogue. */
1661 direct_return (void)
1663 if (reload_completed)
1665 if (cfun->static_chain_decl == 0
1666 && (spu_saved_regs_size ()
1667 + get_frame_size ()
1668 + crtl->outgoing_args_size
1669 + crtl->args.pretend_args_size == 0)
1670 && crtl->is_leaf)
1671 return 1;
1673 return 0;
1677 The stack frame looks like this:
1678 +-------------+
1679 | incoming |
1680 | args |
1681 AP -> +-------------+
1682 | $lr save |
1683 +-------------+
1684 prev SP | back chain |
1685 +-------------+
1686 | var args |
1687 | reg save | crtl->args.pretend_args_size bytes
1688 +-------------+
1689 | ... |
1690 | saved regs | spu_saved_regs_size() bytes
1691 FP -> +-------------+
1692 | ... |
1693 | vars | get_frame_size() bytes
1694 HFP -> +-------------+
1695 | ... |
1696 | outgoing |
1697 | args | crtl->outgoing_args_size bytes
1698 +-------------+
1699 | $lr of next |
1700 | frame |
1701 +-------------+
1702 | back chain |
1703 SP -> +-------------+
1706 void
1707 spu_expand_prologue (void)
1709 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1710 HOST_WIDE_INT total_size;
1711 HOST_WIDE_INT saved_regs_size;
1712 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1713 rtx scratch_reg_0, scratch_reg_1;
1714 rtx_insn *insn;
1715 rtx real;
1717 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1718 cfun->machine->pic_reg = pic_offset_table_rtx;
1720 if (spu_naked_function_p (current_function_decl))
1721 return;
1723 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1724 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1726 saved_regs_size = spu_saved_regs_size ();
1727 total_size = size + saved_regs_size
1728 + crtl->outgoing_args_size
1729 + crtl->args.pretend_args_size;
1731 if (!crtl->is_leaf
1732 || cfun->calls_alloca || total_size > 0)
1733 total_size += STACK_POINTER_OFFSET;
1735 /* Save this first because code after this might use the link
1736 register as a scratch register. */
1737 if (!crtl->is_leaf)
1739 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1740 RTX_FRAME_RELATED_P (insn) = 1;
1743 if (total_size > 0)
1745 offset = -crtl->args.pretend_args_size;
1746 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1747 if (need_to_save_reg (regno, 1))
1749 offset -= 16;
1750 insn = frame_emit_store (regno, sp_reg, offset);
1751 RTX_FRAME_RELATED_P (insn) = 1;
1755 if (flag_pic && cfun->machine->pic_reg)
1757 rtx pic_reg = cfun->machine->pic_reg;
1758 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1759 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1762 if (total_size > 0)
1764 if (flag_stack_check)
1766 /* We compare against total_size-1 because
1767 ($sp >= total_size) <=> ($sp > total_size-1) */
1768 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1769 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1770 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1771 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1773 emit_move_insn (scratch_v4si, size_v4si);
1774 size_v4si = scratch_v4si;
1776 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1777 emit_insn (gen_vec_extractv4sisi
1778 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1779 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1782 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1783 the value of the previous $sp because we save it as the back
1784 chain. */
1785 if (total_size <= 2000)
1787 /* In this case we save the back chain first. */
1788 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1789 insn =
1790 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1792 else
1794 insn = emit_move_insn (scratch_reg_0, sp_reg);
1795 insn =
1796 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1798 RTX_FRAME_RELATED_P (insn) = 1;
1799 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1800 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1802 if (total_size > 2000)
1804 /* Save the back chain ptr */
1805 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1808 if (frame_pointer_needed)
1810 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1811 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1812 + crtl->outgoing_args_size;
1813 /* Set the new frame_pointer */
1814 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1815 RTX_FRAME_RELATED_P (insn) = 1;
1816 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1817 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1818 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1822 if (flag_stack_usage_info)
1823 current_function_static_stack_size = total_size;
1826 void
1827 spu_expand_epilogue (bool sibcall_p)
1829 int size = get_frame_size (), offset, regno;
1830 HOST_WIDE_INT saved_regs_size, total_size;
1831 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1832 rtx scratch_reg_0;
1834 if (spu_naked_function_p (current_function_decl))
1835 return;
1837 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1839 saved_regs_size = spu_saved_regs_size ();
1840 total_size = size + saved_regs_size
1841 + crtl->outgoing_args_size
1842 + crtl->args.pretend_args_size;
1844 if (!crtl->is_leaf
1845 || cfun->calls_alloca || total_size > 0)
1846 total_size += STACK_POINTER_OFFSET;
1848 if (total_size > 0)
1850 if (cfun->calls_alloca)
1851 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1852 else
1853 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1856 if (saved_regs_size > 0)
1858 offset = -crtl->args.pretend_args_size;
1859 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1860 if (need_to_save_reg (regno, 1))
1862 offset -= 0x10;
1863 frame_emit_load (regno, sp_reg, offset);
1868 if (!crtl->is_leaf)
1869 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1871 if (!sibcall_p)
1873 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1874 emit_jump_insn (gen__return ());
1879 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1881 if (count != 0)
1882 return 0;
1883 /* This is inefficient because it ends up copying to a save-register
1884 which then gets saved even though $lr has already been saved. But
1885 it does generate better code for leaf functions and we don't need
1886 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1887 used for __builtin_return_address anyway, so maybe we don't care if
1888 it's inefficient. */
1889 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1893 /* Given VAL, generate a constant appropriate for MODE.
1894 If MODE is a vector mode, every element will be VAL.
1895 For TImode, VAL will be zero extended to 128 bits. */
1897 spu_const (machine_mode mode, HOST_WIDE_INT val)
1899 rtx inner;
1900 rtvec v;
1901 int units, i;
1903 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1904 || GET_MODE_CLASS (mode) == MODE_FLOAT
1905 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1906 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1908 if (GET_MODE_CLASS (mode) == MODE_INT)
1909 return immed_double_const (val, 0, mode);
1911 /* val is the bit representation of the float */
1912 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1913 return hwint_to_const_double (mode, val);
1915 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1916 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1917 else
1918 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1920 units = GET_MODE_NUNITS (mode);
1922 v = rtvec_alloc (units);
1924 for (i = 0; i < units; ++i)
1925 RTVEC_ELT (v, i) = inner;
1927 return gen_rtx_CONST_VECTOR (mode, v);
1930 /* Create a MODE vector constant from 4 ints. */
1932 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1934 unsigned char arr[16];
1935 arr[0] = (a >> 24) & 0xff;
1936 arr[1] = (a >> 16) & 0xff;
1937 arr[2] = (a >> 8) & 0xff;
1938 arr[3] = (a >> 0) & 0xff;
1939 arr[4] = (b >> 24) & 0xff;
1940 arr[5] = (b >> 16) & 0xff;
1941 arr[6] = (b >> 8) & 0xff;
1942 arr[7] = (b >> 0) & 0xff;
1943 arr[8] = (c >> 24) & 0xff;
1944 arr[9] = (c >> 16) & 0xff;
1945 arr[10] = (c >> 8) & 0xff;
1946 arr[11] = (c >> 0) & 0xff;
1947 arr[12] = (d >> 24) & 0xff;
1948 arr[13] = (d >> 16) & 0xff;
1949 arr[14] = (d >> 8) & 0xff;
1950 arr[15] = (d >> 0) & 0xff;
1951 return array_to_constant(mode, arr);
1954 /* branch hint stuff */
1956 /* An array of these is used to propagate hints to predecessor blocks. */
1957 struct spu_bb_info
1959 rtx_insn *prop_jump; /* propagated from another block */
1960 int bb_index; /* the original block. */
1962 static struct spu_bb_info *spu_bb_info;
1964 #define STOP_HINT_P(INSN) \
1965 (CALL_P(INSN) \
1966 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1967 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1969 /* 1 when RTX is a hinted branch or its target. We keep track of
1970 what has been hinted so the safe-hint code can test it easily. */
1971 #define HINTED_P(RTX) \
1972 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1974 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1975 #define SCHED_ON_EVEN_P(RTX) \
1976 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1978 /* Emit a nop for INSN such that the two will dual issue. This assumes
1979 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1980 We check for TImode to handle a MULTI1 insn which has dual issued its
1981 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1982 static void
1983 emit_nop_for_insn (rtx_insn *insn)
1985 int p;
1986 rtx_insn *new_insn;
1988 /* We need to handle JUMP_TABLE_DATA separately. */
1989 if (JUMP_TABLE_DATA_P (insn))
1991 new_insn = emit_insn_after (gen_lnop(), insn);
1992 recog_memoized (new_insn);
1993 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1994 return;
1997 p = get_pipe (insn);
1998 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1999 new_insn = emit_insn_after (gen_lnop (), insn);
2000 else if (p == 1 && GET_MODE (insn) == TImode)
2002 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2003 PUT_MODE (new_insn, TImode);
2004 PUT_MODE (insn, VOIDmode);
2006 else
2007 new_insn = emit_insn_after (gen_lnop (), insn);
2008 recog_memoized (new_insn);
2009 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2012 /* Insert nops in basic blocks to meet dual issue alignment
2013 requirements. Also make sure hbrp and hint instructions are at least
2014 one cycle apart, possibly inserting a nop. */
2015 static void
2016 pad_bb(void)
2018 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2019 int length;
2020 int addr;
2022 /* This sets up INSN_ADDRESSES. */
2023 shorten_branches (get_insns ());
2025 /* Keep track of length added by nops. */
2026 length = 0;
2028 prev_insn = 0;
2029 insn = get_insns ();
2030 if (!active_insn_p (insn))
2031 insn = next_active_insn (insn);
2032 for (; insn; insn = next_insn)
2034 next_insn = next_active_insn (insn);
2035 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2036 || INSN_CODE (insn) == CODE_FOR_hbr)
2038 if (hbr_insn)
2040 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2041 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2042 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2043 || (a1 - a0 == 4))
2045 prev_insn = emit_insn_before (gen_lnop (), insn);
2046 PUT_MODE (prev_insn, GET_MODE (insn));
2047 PUT_MODE (insn, TImode);
2048 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2049 length += 4;
2052 hbr_insn = insn;
2054 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2056 if (GET_MODE (insn) == TImode)
2057 PUT_MODE (next_insn, TImode);
2058 insn = next_insn;
2059 next_insn = next_active_insn (insn);
2061 addr = INSN_ADDRESSES (INSN_UID (insn));
2062 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2064 if (((addr + length) & 7) != 0)
2066 emit_nop_for_insn (prev_insn);
2067 length += 4;
2070 else if (GET_MODE (insn) == TImode
2071 && ((next_insn && GET_MODE (next_insn) != TImode)
2072 || get_attr_type (insn) == TYPE_MULTI0)
2073 && ((addr + length) & 7) != 0)
2075 /* prev_insn will always be set because the first insn is
2076 always 8-byte aligned. */
2077 emit_nop_for_insn (prev_insn);
2078 length += 4;
2080 prev_insn = insn;
2085 /* Routines for branch hints. */
2087 static void
2088 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2089 int distance, sbitmap blocks)
2091 rtx_insn *hint;
2092 rtx_insn *insn;
2093 rtx_jump_table_data *table;
2095 if (before == 0 || branch == 0 || target == 0)
2096 return;
2098 /* While scheduling we require hints to be no further than 600, so
2099 we need to enforce that here too */
2100 if (distance > 600)
2101 return;
2103 /* If we have a Basic block note, emit it after the basic block note. */
2104 if (NOTE_INSN_BASIC_BLOCK_P (before))
2105 before = NEXT_INSN (before);
2107 rtx_code_label *branch_label = gen_label_rtx ();
2108 LABEL_NUSES (branch_label)++;
2109 LABEL_PRESERVE_P (branch_label) = 1;
2110 insn = emit_label_before (branch_label, branch);
2111 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2112 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2114 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2115 recog_memoized (hint);
2116 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2117 HINTED_P (branch) = 1;
2119 if (GET_CODE (target) == LABEL_REF)
2120 HINTED_P (XEXP (target, 0)) = 1;
2121 else if (tablejump_p (branch, 0, &table))
2123 rtvec vec;
2124 int j;
2125 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2126 vec = XVEC (PATTERN (table), 0);
2127 else
2128 vec = XVEC (PATTERN (table), 1);
2129 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2130 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2133 if (distance >= 588)
2135 /* Make sure the hint isn't scheduled any earlier than this point,
2136 which could make it too far for the branch offest to fit */
2137 insn = emit_insn_before (gen_blockage (), hint);
2138 recog_memoized (insn);
2139 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2141 else if (distance <= 8 * 4)
2143 /* To guarantee at least 8 insns between the hint and branch we
2144 insert nops. */
2145 int d;
2146 for (d = distance; d < 8 * 4; d += 4)
2148 insn =
2149 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2150 recog_memoized (insn);
2151 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2154 /* Make sure any nops inserted aren't scheduled before the hint. */
2155 insn = emit_insn_after (gen_blockage (), hint);
2156 recog_memoized (insn);
2157 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2159 /* Make sure any nops inserted aren't scheduled after the call. */
2160 if (CALL_P (branch) && distance < 8 * 4)
2162 insn = emit_insn_before (gen_blockage (), branch);
2163 recog_memoized (insn);
2164 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2169 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2170 the rtx for the branch target. */
2171 static rtx
2172 get_branch_target (rtx_insn *branch)
2174 if (JUMP_P (branch))
2176 rtx set, src;
2178 /* Return statements */
2179 if (GET_CODE (PATTERN (branch)) == RETURN)
2180 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2182 /* ASM GOTOs. */
2183 if (extract_asm_operands (PATTERN (branch)) != NULL)
2184 return NULL;
2186 set = single_set (branch);
2187 src = SET_SRC (set);
2188 if (GET_CODE (SET_DEST (set)) != PC)
2189 abort ();
2191 if (GET_CODE (src) == IF_THEN_ELSE)
2193 rtx lab = 0;
2194 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2195 if (note)
2197 /* If the more probable case is not a fall through, then
2198 try a branch hint. */
2199 int prob = profile_probability::from_reg_br_prob_note
2200 (XINT (note, 0)).to_reg_br_prob_base ();
2201 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2202 && GET_CODE (XEXP (src, 1)) != PC)
2203 lab = XEXP (src, 1);
2204 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2205 && GET_CODE (XEXP (src, 2)) != PC)
2206 lab = XEXP (src, 2);
2208 if (lab)
2210 if (GET_CODE (lab) == RETURN)
2211 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2212 return lab;
2214 return 0;
2217 return src;
2219 else if (CALL_P (branch))
2221 rtx call;
2222 /* All of our call patterns are in a PARALLEL and the CALL is
2223 the first pattern in the PARALLEL. */
2224 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2225 abort ();
2226 call = XVECEXP (PATTERN (branch), 0, 0);
2227 if (GET_CODE (call) == SET)
2228 call = SET_SRC (call);
2229 if (GET_CODE (call) != CALL)
2230 abort ();
2231 return XEXP (XEXP (call, 0), 0);
2233 return 0;
2236 /* The special $hbr register is used to prevent the insn scheduler from
2237 moving hbr insns across instructions which invalidate them. It
2238 should only be used in a clobber, and this function searches for
2239 insns which clobber it. */
2240 static bool
2241 insn_clobbers_hbr (rtx_insn *insn)
2243 if (INSN_P (insn)
2244 && GET_CODE (PATTERN (insn)) == PARALLEL)
2246 rtx parallel = PATTERN (insn);
2247 rtx clobber;
2248 int j;
2249 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2251 clobber = XVECEXP (parallel, 0, j);
2252 if (GET_CODE (clobber) == CLOBBER
2253 && GET_CODE (XEXP (clobber, 0)) == REG
2254 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2255 return 1;
2258 return 0;
2261 /* Search up to 32 insns starting at FIRST:
2262 - at any kind of hinted branch, just return
2263 - at any unconditional branch in the first 15 insns, just return
2264 - at a call or indirect branch, after the first 15 insns, force it to
2265 an even address and return
2266 - at any unconditional branch, after the first 15 insns, force it to
2267 an even address.
2268 At then end of the search, insert an hbrp within 4 insns of FIRST,
2269 and an hbrp within 16 instructions of FIRST.
2271 static void
2272 insert_hbrp_for_ilb_runout (rtx_insn *first)
2274 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2275 int addr = 0, length, first_addr = -1;
2276 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2277 int insert_lnop_after = 0;
2278 for (insn = first; insn; insn = NEXT_INSN (insn))
2279 if (INSN_P (insn))
2281 if (first_addr == -1)
2282 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2283 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2284 length = get_attr_length (insn);
2286 if (before_4 == 0 && addr + length >= 4 * 4)
2287 before_4 = insn;
2288 /* We test for 14 instructions because the first hbrp will add
2289 up to 2 instructions. */
2290 if (before_16 == 0 && addr + length >= 14 * 4)
2291 before_16 = insn;
2293 if (INSN_CODE (insn) == CODE_FOR_hbr)
2295 /* Make sure an hbrp is at least 2 cycles away from a hint.
2296 Insert an lnop after the hbrp when necessary. */
2297 if (before_4 == 0 && addr > 0)
2299 before_4 = insn;
2300 insert_lnop_after |= 1;
2302 else if (before_4 && addr <= 4 * 4)
2303 insert_lnop_after |= 1;
2304 if (before_16 == 0 && addr > 10 * 4)
2306 before_16 = insn;
2307 insert_lnop_after |= 2;
2309 else if (before_16 && addr <= 14 * 4)
2310 insert_lnop_after |= 2;
2313 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2315 if (addr < hbrp_addr0)
2316 hbrp_addr0 = addr;
2317 else if (addr < hbrp_addr1)
2318 hbrp_addr1 = addr;
2321 if (CALL_P (insn) || JUMP_P (insn))
2323 if (HINTED_P (insn))
2324 return;
2326 /* Any branch after the first 15 insns should be on an even
2327 address to avoid a special case branch. There might be
2328 some nops and/or hbrps inserted, so we test after 10
2329 insns. */
2330 if (addr > 10 * 4)
2331 SCHED_ON_EVEN_P (insn) = 1;
2334 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2335 return;
2338 if (addr + length >= 32 * 4)
2340 gcc_assert (before_4 && before_16);
2341 if (hbrp_addr0 > 4 * 4)
2343 insn =
2344 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2345 recog_memoized (insn);
2346 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2347 INSN_ADDRESSES_NEW (insn,
2348 INSN_ADDRESSES (INSN_UID (before_4)));
2349 PUT_MODE (insn, GET_MODE (before_4));
2350 PUT_MODE (before_4, TImode);
2351 if (insert_lnop_after & 1)
2353 insn = emit_insn_before (gen_lnop (), before_4);
2354 recog_memoized (insn);
2355 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2356 INSN_ADDRESSES_NEW (insn,
2357 INSN_ADDRESSES (INSN_UID (before_4)));
2358 PUT_MODE (insn, TImode);
2361 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2362 && hbrp_addr1 > 16 * 4)
2364 insn =
2365 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2366 recog_memoized (insn);
2367 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2368 INSN_ADDRESSES_NEW (insn,
2369 INSN_ADDRESSES (INSN_UID (before_16)));
2370 PUT_MODE (insn, GET_MODE (before_16));
2371 PUT_MODE (before_16, TImode);
2372 if (insert_lnop_after & 2)
2374 insn = emit_insn_before (gen_lnop (), before_16);
2375 recog_memoized (insn);
2376 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2377 INSN_ADDRESSES_NEW (insn,
2378 INSN_ADDRESSES (INSN_UID
2379 (before_16)));
2380 PUT_MODE (insn, TImode);
2383 return;
2386 else if (BARRIER_P (insn))
2387 return;
2391 /* The SPU might hang when it executes 48 inline instructions after a
2392 hinted branch jumps to its hinted target. The beginning of a
2393 function and the return from a call might have been hinted, and
2394 must be handled as well. To prevent a hang we insert 2 hbrps. The
2395 first should be within 6 insns of the branch target. The second
2396 should be within 22 insns of the branch target. When determining
2397 if hbrps are necessary, we look for only 32 inline instructions,
2398 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2399 when inserting new hbrps, we insert them within 4 and 16 insns of
2400 the target. */
2401 static void
2402 insert_hbrp (void)
2404 rtx_insn *insn;
2405 if (TARGET_SAFE_HINTS)
2407 shorten_branches (get_insns ());
2408 /* Insert hbrp at beginning of function */
2409 insn = next_active_insn (get_insns ());
2410 if (insn)
2411 insert_hbrp_for_ilb_runout (insn);
2412 /* Insert hbrp after hinted targets. */
2413 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2414 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2415 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2419 static int in_spu_reorg;
2421 static void
2422 spu_var_tracking (void)
2424 if (flag_var_tracking)
2426 df_analyze ();
2427 timevar_push (TV_VAR_TRACKING);
2428 variable_tracking_main ();
2429 timevar_pop (TV_VAR_TRACKING);
2430 df_finish_pass (false);
2434 /* Insert branch hints. There are no branch optimizations after this
2435 pass, so it's safe to set our branch hints now. */
2436 static void
2437 spu_machine_dependent_reorg (void)
2439 sbitmap blocks;
2440 basic_block bb;
2441 rtx_insn *branch, *insn;
2442 rtx branch_target = 0;
2443 int branch_addr = 0, insn_addr, required_dist = 0;
2444 int i;
2445 unsigned int j;
2447 if (!TARGET_BRANCH_HINTS || optimize == 0)
2449 /* We still do it for unoptimized code because an external
2450 function might have hinted a call or return. */
2451 compute_bb_for_insn ();
2452 insert_hbrp ();
2453 pad_bb ();
2454 spu_var_tracking ();
2455 free_bb_for_insn ();
2456 return;
2459 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2460 bitmap_clear (blocks);
2462 in_spu_reorg = 1;
2463 compute_bb_for_insn ();
2465 /* (Re-)discover loops so that bb->loop_father can be used
2466 in the analysis below. */
2467 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2469 compact_blocks ();
2471 spu_bb_info =
2472 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2473 sizeof (struct spu_bb_info));
2475 /* We need exact insn addresses and lengths. */
2476 shorten_branches (get_insns ());
2478 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2480 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2481 branch = 0;
2482 if (spu_bb_info[i].prop_jump)
2484 branch = spu_bb_info[i].prop_jump;
2485 branch_target = get_branch_target (branch);
2486 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2487 required_dist = spu_hint_dist;
2489 /* Search from end of a block to beginning. In this loop, find
2490 jumps which need a branch and emit them only when:
2491 - it's an indirect branch and we're at the insn which sets
2492 the register
2493 - we're at an insn that will invalidate the hint. e.g., a
2494 call, another hint insn, inline asm that clobbers $hbr, and
2495 some inlined operations (divmodsi4). Don't consider jumps
2496 because they are only at the end of a block and are
2497 considered when we are deciding whether to propagate
2498 - we're getting too far away from the branch. The hbr insns
2499 only have a signed 10 bit offset
2500 We go back as far as possible so the branch will be considered
2501 for propagation when we get to the beginning of the block. */
2502 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2504 if (INSN_P (insn))
2506 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2507 if (branch
2508 && ((GET_CODE (branch_target) == REG
2509 && set_of (branch_target, insn) != NULL_RTX)
2510 || insn_clobbers_hbr (insn)
2511 || branch_addr - insn_addr > 600))
2513 rtx_insn *next = NEXT_INSN (insn);
2514 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2515 if (insn != BB_END (bb)
2516 && branch_addr - next_addr >= required_dist)
2518 if (dump_file)
2519 fprintf (dump_file,
2520 "hint for %i in block %i before %i\n",
2521 INSN_UID (branch), bb->index,
2522 INSN_UID (next));
2523 spu_emit_branch_hint (next, branch, branch_target,
2524 branch_addr - next_addr, blocks);
2526 branch = 0;
2529 /* JUMP_P will only be true at the end of a block. When
2530 branch is already set it means we've previously decided
2531 to propagate a hint for that branch into this block. */
2532 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2534 branch = 0;
2535 if ((branch_target = get_branch_target (insn)))
2537 branch = insn;
2538 branch_addr = insn_addr;
2539 required_dist = spu_hint_dist;
2543 if (insn == BB_HEAD (bb))
2544 break;
2547 if (branch)
2549 /* If we haven't emitted a hint for this branch yet, it might
2550 be profitable to emit it in one of the predecessor blocks,
2551 especially for loops. */
2552 rtx_insn *bbend;
2553 basic_block prev = 0, prop = 0, prev2 = 0;
2554 int loop_exit = 0, simple_loop = 0;
2555 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2557 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2558 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2559 prev = EDGE_PRED (bb, j)->src;
2560 else
2561 prev2 = EDGE_PRED (bb, j)->src;
2563 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2564 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2565 loop_exit = 1;
2566 else if (EDGE_SUCC (bb, j)->dest == bb)
2567 simple_loop = 1;
2569 /* If this branch is a loop exit then propagate to previous
2570 fallthru block. This catches the cases when it is a simple
2571 loop or when there is an initial branch into the loop. */
2572 if (prev && (loop_exit || simple_loop)
2573 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2574 prop = prev;
2576 /* If there is only one adjacent predecessor. Don't propagate
2577 outside this loop. */
2578 else if (prev && single_pred_p (bb)
2579 && prev->loop_father == bb->loop_father)
2580 prop = prev;
2582 /* If this is the JOIN block of a simple IF-THEN then
2583 propagate the hint to the HEADER block. */
2584 else if (prev && prev2
2585 && EDGE_COUNT (bb->preds) == 2
2586 && EDGE_COUNT (prev->preds) == 1
2587 && EDGE_PRED (prev, 0)->src == prev2
2588 && prev2->loop_father == bb->loop_father
2589 && GET_CODE (branch_target) != REG)
2590 prop = prev;
2592 /* Don't propagate when:
2593 - this is a simple loop and the hint would be too far
2594 - this is not a simple loop and there are 16 insns in
2595 this block already
2596 - the predecessor block ends in a branch that will be
2597 hinted
2598 - the predecessor block ends in an insn that invalidates
2599 the hint */
2600 if (prop
2601 && prop->index >= 0
2602 && (bbend = BB_END (prop))
2603 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2604 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2605 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2607 if (dump_file)
2608 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2609 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2610 bb->index, prop->index, bb_loop_depth (bb),
2611 INSN_UID (branch), loop_exit, simple_loop,
2612 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2614 spu_bb_info[prop->index].prop_jump = branch;
2615 spu_bb_info[prop->index].bb_index = i;
2617 else if (branch_addr - next_addr >= required_dist)
2619 if (dump_file)
2620 fprintf (dump_file, "hint for %i in block %i before %i\n",
2621 INSN_UID (branch), bb->index,
2622 INSN_UID (NEXT_INSN (insn)));
2623 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2624 branch_addr - next_addr, blocks);
2626 branch = 0;
2629 free (spu_bb_info);
2631 if (!bitmap_empty_p (blocks))
2632 find_many_sub_basic_blocks (blocks);
2634 /* We have to schedule to make sure alignment is ok. */
2635 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2637 /* The hints need to be scheduled, so call it again. */
2638 schedule_insns ();
2639 df_finish_pass (true);
2641 insert_hbrp ();
2643 pad_bb ();
2645 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2646 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2648 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2649 between its branch label and the branch . We don't move the
2650 label because GCC expects it at the beginning of the block. */
2651 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2652 rtx label_ref = XVECEXP (unspec, 0, 0);
2653 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2654 rtx_insn *branch;
2655 int offset = 0;
2656 for (branch = NEXT_INSN (label);
2657 !JUMP_P (branch) && !CALL_P (branch);
2658 branch = NEXT_INSN (branch))
2659 if (NONJUMP_INSN_P (branch))
2660 offset += get_attr_length (branch);
2661 if (offset > 0)
2662 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2665 spu_var_tracking ();
2667 loop_optimizer_finalize ();
2669 free_bb_for_insn ();
2671 in_spu_reorg = 0;
2675 /* Insn scheduling routines, primarily for dual issue. */
2676 static int
2677 spu_sched_issue_rate (void)
2679 return 2;
2682 static int
2683 uses_ls_unit(rtx_insn *insn)
2685 rtx set = single_set (insn);
2686 if (set != 0
2687 && (GET_CODE (SET_DEST (set)) == MEM
2688 || GET_CODE (SET_SRC (set)) == MEM))
2689 return 1;
2690 return 0;
2693 static int
2694 get_pipe (rtx_insn *insn)
2696 enum attr_type t;
2697 /* Handle inline asm */
2698 if (INSN_CODE (insn) == -1)
2699 return -1;
2700 t = get_attr_type (insn);
2701 switch (t)
2703 case TYPE_CONVERT:
2704 return -2;
2705 case TYPE_MULTI0:
2706 return -1;
2708 case TYPE_FX2:
2709 case TYPE_FX3:
2710 case TYPE_SPR:
2711 case TYPE_NOP:
2712 case TYPE_FXB:
2713 case TYPE_FPD:
2714 case TYPE_FP6:
2715 case TYPE_FP7:
2716 return 0;
2718 case TYPE_LNOP:
2719 case TYPE_SHUF:
2720 case TYPE_LOAD:
2721 case TYPE_STORE:
2722 case TYPE_BR:
2723 case TYPE_MULTI1:
2724 case TYPE_HBR:
2725 case TYPE_IPREFETCH:
2726 return 1;
2727 default:
2728 abort ();
2733 /* haifa-sched.c has a static variable that keeps track of the current
2734 cycle. It is passed to spu_sched_reorder, and we record it here for
2735 use by spu_sched_variable_issue. It won't be accurate if the
2736 scheduler updates it's clock_var between the two calls. */
2737 static int clock_var;
2739 /* This is used to keep track of insn alignment. Set to 0 at the
2740 beginning of each block and increased by the "length" attr of each
2741 insn scheduled. */
2742 static int spu_sched_length;
2744 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2745 ready list appropriately in spu_sched_reorder(). */
2746 static int pipe0_clock;
2747 static int pipe1_clock;
2749 static int prev_clock_var;
2751 static int prev_priority;
2753 /* The SPU needs to load the next ilb sometime during the execution of
2754 the previous ilb. There is a potential conflict if every cycle has a
2755 load or store. To avoid the conflict we make sure the load/store
2756 unit is free for at least one cycle during the execution of insns in
2757 the previous ilb. */
2758 static int spu_ls_first;
2759 static int prev_ls_clock;
2761 static void
2762 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2763 int max_ready ATTRIBUTE_UNUSED)
2765 spu_sched_length = 0;
2768 static void
2769 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2770 int max_ready ATTRIBUTE_UNUSED)
2772 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2774 /* When any block might be at least 8-byte aligned, assume they
2775 will all be at least 8-byte aligned to make sure dual issue
2776 works out correctly. */
2777 spu_sched_length = 0;
2779 spu_ls_first = INT_MAX;
2780 clock_var = -1;
2781 prev_ls_clock = -1;
2782 pipe0_clock = -1;
2783 pipe1_clock = -1;
2784 prev_clock_var = -1;
2785 prev_priority = -1;
2788 static int
2789 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2790 int verbose ATTRIBUTE_UNUSED,
2791 rtx_insn *insn, int more)
2793 int len;
2794 int p;
2795 if (GET_CODE (PATTERN (insn)) == USE
2796 || GET_CODE (PATTERN (insn)) == CLOBBER
2797 || (len = get_attr_length (insn)) == 0)
2798 return more;
2800 spu_sched_length += len;
2802 /* Reset on inline asm */
2803 if (INSN_CODE (insn) == -1)
2805 spu_ls_first = INT_MAX;
2806 pipe0_clock = -1;
2807 pipe1_clock = -1;
2808 return 0;
2810 p = get_pipe (insn);
2811 if (p == 0)
2812 pipe0_clock = clock_var;
2813 else
2814 pipe1_clock = clock_var;
2816 if (in_spu_reorg)
2818 if (clock_var - prev_ls_clock > 1
2819 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2820 spu_ls_first = INT_MAX;
2821 if (uses_ls_unit (insn))
2823 if (spu_ls_first == INT_MAX)
2824 spu_ls_first = spu_sched_length;
2825 prev_ls_clock = clock_var;
2828 /* The scheduler hasn't inserted the nop, but we will later on.
2829 Include those nops in spu_sched_length. */
2830 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2831 spu_sched_length += 4;
2832 prev_clock_var = clock_var;
2834 /* more is -1 when called from spu_sched_reorder for new insns
2835 that don't have INSN_PRIORITY */
2836 if (more >= 0)
2837 prev_priority = INSN_PRIORITY (insn);
2840 /* Always try issuing more insns. spu_sched_reorder will decide
2841 when the cycle should be advanced. */
2842 return 1;
2845 /* This function is called for both TARGET_SCHED_REORDER and
2846 TARGET_SCHED_REORDER2. */
2847 static int
2848 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2849 rtx_insn **ready, int *nreadyp, int clock)
2851 int i, nready = *nreadyp;
2852 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2853 rtx_insn *insn;
2855 clock_var = clock;
2857 if (nready <= 0 || pipe1_clock >= clock)
2858 return 0;
2860 /* Find any rtl insns that don't generate assembly insns and schedule
2861 them first. */
2862 for (i = nready - 1; i >= 0; i--)
2864 insn = ready[i];
2865 if (INSN_CODE (insn) == -1
2866 || INSN_CODE (insn) == CODE_FOR_blockage
2867 || (INSN_P (insn) && get_attr_length (insn) == 0))
2869 ready[i] = ready[nready - 1];
2870 ready[nready - 1] = insn;
2871 return 1;
2875 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2876 for (i = 0; i < nready; i++)
2877 if (INSN_CODE (ready[i]) != -1)
2879 insn = ready[i];
2880 switch (get_attr_type (insn))
2882 default:
2883 case TYPE_MULTI0:
2884 case TYPE_CONVERT:
2885 case TYPE_FX2:
2886 case TYPE_FX3:
2887 case TYPE_SPR:
2888 case TYPE_NOP:
2889 case TYPE_FXB:
2890 case TYPE_FPD:
2891 case TYPE_FP6:
2892 case TYPE_FP7:
2893 pipe_0 = i;
2894 break;
2895 case TYPE_LOAD:
2896 case TYPE_STORE:
2897 pipe_ls = i;
2898 /* FALLTHRU */
2899 case TYPE_LNOP:
2900 case TYPE_SHUF:
2901 case TYPE_BR:
2902 case TYPE_MULTI1:
2903 case TYPE_HBR:
2904 pipe_1 = i;
2905 break;
2906 case TYPE_IPREFETCH:
2907 pipe_hbrp = i;
2908 break;
2912 /* In the first scheduling phase, schedule loads and stores together
2913 to increase the chance they will get merged during postreload CSE. */
2914 if (!reload_completed && pipe_ls >= 0)
2916 insn = ready[pipe_ls];
2917 ready[pipe_ls] = ready[nready - 1];
2918 ready[nready - 1] = insn;
2919 return 1;
2922 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2923 if (pipe_hbrp >= 0)
2924 pipe_1 = pipe_hbrp;
2926 /* When we have loads/stores in every cycle of the last 15 insns and
2927 we are about to schedule another load/store, emit an hbrp insn
2928 instead. */
2929 if (in_spu_reorg
2930 && spu_sched_length - spu_ls_first >= 4 * 15
2931 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2933 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2934 recog_memoized (insn);
2935 if (pipe0_clock < clock)
2936 PUT_MODE (insn, TImode);
2937 spu_sched_variable_issue (file, verbose, insn, -1);
2938 return 0;
2941 /* In general, we want to emit nops to increase dual issue, but dual
2942 issue isn't faster when one of the insns could be scheduled later
2943 without effecting the critical path. We look at INSN_PRIORITY to
2944 make a good guess, but it isn't perfect so -mdual-nops=n can be
2945 used to effect it. */
2946 if (in_spu_reorg && spu_dual_nops < 10)
2948 /* When we are at an even address and we are not issuing nops to
2949 improve scheduling then we need to advance the cycle. */
2950 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2951 && (spu_dual_nops == 0
2952 || (pipe_1 != -1
2953 && prev_priority >
2954 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2955 return 0;
2957 /* When at an odd address, schedule the highest priority insn
2958 without considering pipeline. */
2959 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2960 && (spu_dual_nops == 0
2961 || (prev_priority >
2962 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2963 return 1;
2967 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2968 pipe0 insn in the ready list, schedule it. */
2969 if (pipe0_clock < clock && pipe_0 >= 0)
2970 schedule_i = pipe_0;
2972 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2973 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2974 else
2975 schedule_i = pipe_1;
2977 if (schedule_i > -1)
2979 insn = ready[schedule_i];
2980 ready[schedule_i] = ready[nready - 1];
2981 ready[nready - 1] = insn;
2982 return 1;
2984 return 0;
2987 /* INSN is dependent on DEP_INSN. */
2988 static int
2989 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2990 int cost, unsigned int)
2992 rtx set;
2994 /* The blockage pattern is used to prevent instructions from being
2995 moved across it and has no cost. */
2996 if (INSN_CODE (insn) == CODE_FOR_blockage
2997 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2998 return 0;
3000 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3001 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3002 return 0;
3004 /* Make sure hbrps are spread out. */
3005 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3006 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3007 return 8;
3009 /* Make sure hints and hbrps are 2 cycles apart. */
3010 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3011 || INSN_CODE (insn) == CODE_FOR_hbr)
3012 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3013 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3014 return 2;
3016 /* An hbrp has no real dependency on other insns. */
3017 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3018 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3019 return 0;
3021 /* Assuming that it is unlikely an argument register will be used in
3022 the first cycle of the called function, we reduce the cost for
3023 slightly better scheduling of dep_insn. When not hinted, the
3024 mispredicted branch would hide the cost as well. */
3025 if (CALL_P (insn))
3027 rtx target = get_branch_target (insn);
3028 if (GET_CODE (target) != REG || !set_of (target, insn))
3029 return cost - 2;
3030 return cost;
3033 /* And when returning from a function, let's assume the return values
3034 are completed sooner too. */
3035 if (CALL_P (dep_insn))
3036 return cost - 2;
3038 /* Make sure an instruction that loads from the back chain is schedule
3039 away from the return instruction so a hint is more likely to get
3040 issued. */
3041 if (INSN_CODE (insn) == CODE_FOR__return
3042 && (set = single_set (dep_insn))
3043 && GET_CODE (SET_DEST (set)) == REG
3044 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3045 return 20;
3047 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3048 scheduler makes every insn in a block anti-dependent on the final
3049 jump_insn. We adjust here so higher cost insns will get scheduled
3050 earlier. */
3051 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3052 return insn_cost (dep_insn) - 3;
3054 return cost;
3057 /* Create a CONST_DOUBLE from a string. */
3059 spu_float_const (const char *string, machine_mode mode)
3061 REAL_VALUE_TYPE value;
3062 value = REAL_VALUE_ATOF (string, mode);
3063 return const_double_from_real_value (value, mode);
3067 spu_constant_address_p (rtx x)
3069 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3070 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3071 || GET_CODE (x) == HIGH);
3074 static enum spu_immediate
3075 which_immediate_load (HOST_WIDE_INT val)
3077 gcc_assert (val == trunc_int_for_mode (val, SImode));
3079 if (val >= -0x8000 && val <= 0x7fff)
3080 return SPU_IL;
3081 if (val >= 0 && val <= 0x3ffff)
3082 return SPU_ILA;
3083 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3084 return SPU_ILH;
3085 if ((val & 0xffff) == 0)
3086 return SPU_ILHU;
3088 return SPU_NONE;
3091 /* Return true when OP can be loaded by one of the il instructions, or
3092 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3094 immediate_load_p (rtx op, machine_mode mode)
3096 if (CONSTANT_P (op))
3098 enum immediate_class c = classify_immediate (op, mode);
3099 return c == IC_IL1 || c == IC_IL1s
3100 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3102 return 0;
3105 /* Return true if the first SIZE bytes of arr is a constant that can be
3106 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3107 represent the size and offset of the instruction to use. */
3108 static int
3109 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3111 int cpat, run, i, start;
3112 cpat = 1;
3113 run = 0;
3114 start = -1;
3115 for (i = 0; i < size && cpat; i++)
3116 if (arr[i] != i+16)
3118 if (!run)
3120 start = i;
3121 if (arr[i] == 3)
3122 run = 1;
3123 else if (arr[i] == 2 && arr[i+1] == 3)
3124 run = 2;
3125 else if (arr[i] == 0)
3127 while (arr[i+run] == run && i+run < 16)
3128 run++;
3129 if (run != 4 && run != 8)
3130 cpat = 0;
3132 else
3133 cpat = 0;
3134 if ((i & (run-1)) != 0)
3135 cpat = 0;
3136 i += run;
3138 else
3139 cpat = 0;
3141 if (cpat && (run || size < 16))
3143 if (run == 0)
3144 run = 1;
3145 if (prun)
3146 *prun = run;
3147 if (pstart)
3148 *pstart = start == -1 ? 16-run : start;
3149 return 1;
3151 return 0;
3154 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3155 it into a register. MODE is only valid when OP is a CONST_INT. */
3156 static enum immediate_class
3157 classify_immediate (rtx op, machine_mode mode)
3159 HOST_WIDE_INT val;
3160 unsigned char arr[16];
3161 int i, j, repeated, fsmbi, repeat;
3163 gcc_assert (CONSTANT_P (op));
3165 if (GET_MODE (op) != VOIDmode)
3166 mode = GET_MODE (op);
3168 /* A V4SI const_vector with all identical symbols is ok. */
3169 if (!flag_pic
3170 && mode == V4SImode
3171 && GET_CODE (op) == CONST_VECTOR
3172 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3173 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3174 op = unwrap_const_vec_duplicate (op);
3176 switch (GET_CODE (op))
3178 case SYMBOL_REF:
3179 case LABEL_REF:
3180 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3182 case CONST:
3183 /* We can never know if the resulting address fits in 18 bits and can be
3184 loaded with ila. For now, assume the address will not overflow if
3185 the displacement is "small" (fits 'K' constraint). */
3186 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3188 rtx sym = XEXP (XEXP (op, 0), 0);
3189 rtx cst = XEXP (XEXP (op, 0), 1);
3191 if (GET_CODE (sym) == SYMBOL_REF
3192 && GET_CODE (cst) == CONST_INT
3193 && satisfies_constraint_K (cst))
3194 return IC_IL1s;
3196 return IC_IL2s;
3198 case HIGH:
3199 return IC_IL1s;
3201 case CONST_VECTOR:
3202 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3203 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3204 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3205 return IC_POOL;
3206 /* Fall through. */
3208 case CONST_INT:
3209 case CONST_DOUBLE:
3210 constant_to_array (mode, op, arr);
3212 /* Check that each 4-byte slot is identical. */
3213 repeated = 1;
3214 for (i = 4; i < 16; i += 4)
3215 for (j = 0; j < 4; j++)
3216 if (arr[j] != arr[i + j])
3217 repeated = 0;
3219 if (repeated)
3221 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3222 val = trunc_int_for_mode (val, SImode);
3224 if (which_immediate_load (val) != SPU_NONE)
3225 return IC_IL1;
3228 /* Any mode of 2 bytes or smaller can be loaded with an il
3229 instruction. */
3230 gcc_assert (GET_MODE_SIZE (mode) > 2);
3232 fsmbi = 1;
3233 repeat = 0;
3234 for (i = 0; i < 16 && fsmbi; i++)
3235 if (arr[i] != 0 && repeat == 0)
3236 repeat = arr[i];
3237 else if (arr[i] != 0 && arr[i] != repeat)
3238 fsmbi = 0;
3239 if (fsmbi)
3240 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3242 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3243 return IC_CPAT;
3245 if (repeated)
3246 return IC_IL2;
3248 return IC_POOL;
3249 default:
3250 break;
3252 gcc_unreachable ();
3255 static enum spu_immediate
3256 which_logical_immediate (HOST_WIDE_INT val)
3258 gcc_assert (val == trunc_int_for_mode (val, SImode));
3260 if (val >= -0x200 && val <= 0x1ff)
3261 return SPU_ORI;
3262 if (val >= 0 && val <= 0xffff)
3263 return SPU_IOHL;
3264 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3266 val = trunc_int_for_mode (val, HImode);
3267 if (val >= -0x200 && val <= 0x1ff)
3268 return SPU_ORHI;
3269 if ((val & 0xff) == ((val >> 8) & 0xff))
3271 val = trunc_int_for_mode (val, QImode);
3272 if (val >= -0x200 && val <= 0x1ff)
3273 return SPU_ORBI;
3276 return SPU_NONE;
3279 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3280 CONST_DOUBLEs. */
3281 static int
3282 const_vector_immediate_p (rtx x)
3284 int i;
3285 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3286 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3287 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3288 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3289 return 0;
3290 return 1;
3294 logical_immediate_p (rtx op, machine_mode mode)
3296 HOST_WIDE_INT val;
3297 unsigned char arr[16];
3298 int i, j;
3300 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3301 || GET_CODE (op) == CONST_VECTOR);
3303 if (GET_CODE (op) == CONST_VECTOR
3304 && !const_vector_immediate_p (op))
3305 return 0;
3307 if (GET_MODE (op) != VOIDmode)
3308 mode = GET_MODE (op);
3310 constant_to_array (mode, op, arr);
3312 /* Check that bytes are repeated. */
3313 for (i = 4; i < 16; i += 4)
3314 for (j = 0; j < 4; j++)
3315 if (arr[j] != arr[i + j])
3316 return 0;
3318 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3319 val = trunc_int_for_mode (val, SImode);
3321 i = which_logical_immediate (val);
3322 return i != SPU_NONE && i != SPU_IOHL;
3326 iohl_immediate_p (rtx op, machine_mode mode)
3328 HOST_WIDE_INT val;
3329 unsigned char arr[16];
3330 int i, j;
3332 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3333 || GET_CODE (op) == CONST_VECTOR);
3335 if (GET_CODE (op) == CONST_VECTOR
3336 && !const_vector_immediate_p (op))
3337 return 0;
3339 if (GET_MODE (op) != VOIDmode)
3340 mode = GET_MODE (op);
3342 constant_to_array (mode, op, arr);
3344 /* Check that bytes are repeated. */
3345 for (i = 4; i < 16; i += 4)
3346 for (j = 0; j < 4; j++)
3347 if (arr[j] != arr[i + j])
3348 return 0;
3350 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3351 val = trunc_int_for_mode (val, SImode);
3353 return val >= 0 && val <= 0xffff;
3357 arith_immediate_p (rtx op, machine_mode mode,
3358 HOST_WIDE_INT low, HOST_WIDE_INT high)
3360 HOST_WIDE_INT val;
3361 unsigned char arr[16];
3362 int bytes, i, j;
3364 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3365 || GET_CODE (op) == CONST_VECTOR);
3367 if (GET_CODE (op) == CONST_VECTOR
3368 && !const_vector_immediate_p (op))
3369 return 0;
3371 if (GET_MODE (op) != VOIDmode)
3372 mode = GET_MODE (op);
3374 constant_to_array (mode, op, arr);
3376 bytes = GET_MODE_UNIT_SIZE (mode);
3377 mode = mode_for_size (GET_MODE_UNIT_BITSIZE (mode), MODE_INT, 0);
3379 /* Check that bytes are repeated. */
3380 for (i = bytes; i < 16; i += bytes)
3381 for (j = 0; j < bytes; j++)
3382 if (arr[j] != arr[i + j])
3383 return 0;
3385 val = arr[0];
3386 for (j = 1; j < bytes; j++)
3387 val = (val << 8) | arr[j];
3389 val = trunc_int_for_mode (val, mode);
3391 return val >= low && val <= high;
3394 /* TRUE when op is an immediate and an exact power of 2, and given that
3395 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3396 all entries must be the same. */
3397 bool
3398 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3400 machine_mode int_mode;
3401 HOST_WIDE_INT val;
3402 unsigned char arr[16];
3403 int bytes, i, j;
3405 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3406 || GET_CODE (op) == CONST_VECTOR);
3408 if (GET_CODE (op) == CONST_VECTOR
3409 && !const_vector_immediate_p (op))
3410 return 0;
3412 if (GET_MODE (op) != VOIDmode)
3413 mode = GET_MODE (op);
3415 constant_to_array (mode, op, arr);
3417 mode = GET_MODE_INNER (mode);
3419 bytes = GET_MODE_SIZE (mode);
3420 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3422 /* Check that bytes are repeated. */
3423 for (i = bytes; i < 16; i += bytes)
3424 for (j = 0; j < bytes; j++)
3425 if (arr[j] != arr[i + j])
3426 return 0;
3428 val = arr[0];
3429 for (j = 1; j < bytes; j++)
3430 val = (val << 8) | arr[j];
3432 val = trunc_int_for_mode (val, int_mode);
3434 /* Currently, we only handle SFmode */
3435 gcc_assert (mode == SFmode);
3436 if (mode == SFmode)
3438 int exp = (val >> 23) - 127;
3439 return val > 0 && (val & 0x007fffff) == 0
3440 && exp >= low && exp <= high;
3442 return FALSE;
3445 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3447 static bool
3448 ea_symbol_ref_p (const_rtx x)
3450 tree decl;
3452 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3454 rtx plus = XEXP (x, 0);
3455 rtx op0 = XEXP (plus, 0);
3456 rtx op1 = XEXP (plus, 1);
3457 if (GET_CODE (op1) == CONST_INT)
3458 x = op0;
3461 return (GET_CODE (x) == SYMBOL_REF
3462 && (decl = SYMBOL_REF_DECL (x)) != 0
3463 && TREE_CODE (decl) == VAR_DECL
3464 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3467 /* We accept:
3468 - any 32-bit constant (SImode, SFmode)
3469 - any constant that can be generated with fsmbi (any mode)
3470 - a 64-bit constant where the high and low bits are identical
3471 (DImode, DFmode)
3472 - a 128-bit constant where the four 32-bit words match. */
3473 bool
3474 spu_legitimate_constant_p (machine_mode mode, rtx x)
3476 subrtx_iterator::array_type array;
3477 if (GET_CODE (x) == HIGH)
3478 x = XEXP (x, 0);
3480 /* Reject any __ea qualified reference. These can't appear in
3481 instructions but must be forced to the constant pool. */
3482 FOR_EACH_SUBRTX (iter, array, x, ALL)
3483 if (ea_symbol_ref_p (*iter))
3484 return 0;
3486 /* V4SI with all identical symbols is valid. */
3487 if (!flag_pic
3488 && mode == V4SImode
3489 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3490 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3491 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3492 return const_vec_duplicate_p (x);
3494 if (GET_CODE (x) == CONST_VECTOR
3495 && !const_vector_immediate_p (x))
3496 return 0;
3497 return 1;
3500 /* Valid address are:
3501 - symbol_ref, label_ref, const
3502 - reg
3503 - reg + const_int, where const_int is 16 byte aligned
3504 - reg + reg, alignment doesn't matter
3505 The alignment matters in the reg+const case because lqd and stqd
3506 ignore the 4 least significant bits of the const. We only care about
3507 16 byte modes because the expand phase will change all smaller MEM
3508 references to TImode. */
3509 static bool
3510 spu_legitimate_address_p (machine_mode mode,
3511 rtx x, bool reg_ok_strict)
3513 int aligned = GET_MODE_SIZE (mode) >= 16;
3514 if (aligned
3515 && GET_CODE (x) == AND
3516 && GET_CODE (XEXP (x, 1)) == CONST_INT
3517 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3518 x = XEXP (x, 0);
3519 switch (GET_CODE (x))
3521 case LABEL_REF:
3522 return !TARGET_LARGE_MEM;
3524 case SYMBOL_REF:
3525 case CONST:
3526 /* Keep __ea references until reload so that spu_expand_mov can see them
3527 in MEMs. */
3528 if (ea_symbol_ref_p (x))
3529 return !reload_in_progress && !reload_completed;
3530 return !TARGET_LARGE_MEM;
3532 case CONST_INT:
3533 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3535 case SUBREG:
3536 x = XEXP (x, 0);
3537 if (!REG_P (x))
3538 return 0;
3539 /* FALLTHRU */
3541 case REG:
3542 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3544 case PLUS:
3545 case LO_SUM:
3547 rtx op0 = XEXP (x, 0);
3548 rtx op1 = XEXP (x, 1);
3549 if (GET_CODE (op0) == SUBREG)
3550 op0 = XEXP (op0, 0);
3551 if (GET_CODE (op1) == SUBREG)
3552 op1 = XEXP (op1, 0);
3553 if (GET_CODE (op0) == REG
3554 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3555 && GET_CODE (op1) == CONST_INT
3556 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3557 /* If virtual registers are involved, the displacement will
3558 change later on anyway, so checking would be premature.
3559 Reload will make sure the final displacement after
3560 register elimination is OK. */
3561 || op0 == arg_pointer_rtx
3562 || op0 == frame_pointer_rtx
3563 || op0 == virtual_stack_vars_rtx)
3564 && (!aligned || (INTVAL (op1) & 15) == 0))
3565 return TRUE;
3566 if (GET_CODE (op0) == REG
3567 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3568 && GET_CODE (op1) == REG
3569 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3570 return TRUE;
3572 break;
3574 default:
3575 break;
3577 return FALSE;
3580 /* Like spu_legitimate_address_p, except with named addresses. */
3581 static bool
3582 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3583 bool reg_ok_strict, addr_space_t as)
3585 if (as == ADDR_SPACE_EA)
3586 return (REG_P (x) && (GET_MODE (x) == EAmode));
3588 else if (as != ADDR_SPACE_GENERIC)
3589 gcc_unreachable ();
3591 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3594 /* When the address is reg + const_int, force the const_int into a
3595 register. */
3596 static rtx
3597 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3598 machine_mode mode ATTRIBUTE_UNUSED)
3600 rtx op0, op1;
3601 /* Make sure both operands are registers. */
3602 if (GET_CODE (x) == PLUS)
3604 op0 = XEXP (x, 0);
3605 op1 = XEXP (x, 1);
3606 if (ALIGNED_SYMBOL_REF_P (op0))
3608 op0 = force_reg (Pmode, op0);
3609 mark_reg_pointer (op0, 128);
3611 else if (GET_CODE (op0) != REG)
3612 op0 = force_reg (Pmode, op0);
3613 if (ALIGNED_SYMBOL_REF_P (op1))
3615 op1 = force_reg (Pmode, op1);
3616 mark_reg_pointer (op1, 128);
3618 else if (GET_CODE (op1) != REG)
3619 op1 = force_reg (Pmode, op1);
3620 x = gen_rtx_PLUS (Pmode, op0, op1);
3622 return x;
3625 /* Like spu_legitimate_address, except with named address support. */
3626 static rtx
3627 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3628 addr_space_t as)
3630 if (as != ADDR_SPACE_GENERIC)
3631 return x;
3633 return spu_legitimize_address (x, oldx, mode);
3636 /* Reload reg + const_int for out-of-range displacements. */
3638 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3639 int opnum, int type)
3641 bool removed_and = false;
3643 if (GET_CODE (ad) == AND
3644 && CONST_INT_P (XEXP (ad, 1))
3645 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3647 ad = XEXP (ad, 0);
3648 removed_and = true;
3651 if (GET_CODE (ad) == PLUS
3652 && REG_P (XEXP (ad, 0))
3653 && CONST_INT_P (XEXP (ad, 1))
3654 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3655 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3657 /* Unshare the sum. */
3658 ad = copy_rtx (ad);
3660 /* Reload the displacement. */
3661 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3662 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3663 opnum, (enum reload_type) type);
3665 /* Add back AND for alignment if we stripped it. */
3666 if (removed_and)
3667 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3669 return ad;
3672 return NULL_RTX;
3675 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3676 struct attribute_spec.handler. */
3677 static tree
3678 spu_handle_fndecl_attribute (tree * node,
3679 tree name,
3680 tree args ATTRIBUTE_UNUSED,
3681 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3683 if (TREE_CODE (*node) != FUNCTION_DECL)
3685 warning (0, "%qE attribute only applies to functions",
3686 name);
3687 *no_add_attrs = true;
3690 return NULL_TREE;
3693 /* Handle the "vector" attribute. */
3694 static tree
3695 spu_handle_vector_attribute (tree * node, tree name,
3696 tree args ATTRIBUTE_UNUSED,
3697 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3699 tree type = *node, result = NULL_TREE;
3700 machine_mode mode;
3701 int unsigned_p;
3703 while (POINTER_TYPE_P (type)
3704 || TREE_CODE (type) == FUNCTION_TYPE
3705 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3706 type = TREE_TYPE (type);
3708 mode = TYPE_MODE (type);
3710 unsigned_p = TYPE_UNSIGNED (type);
3711 switch (mode)
3713 case E_DImode:
3714 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3715 break;
3716 case E_SImode:
3717 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3718 break;
3719 case E_HImode:
3720 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3721 break;
3722 case E_QImode:
3723 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3724 break;
3725 case E_SFmode:
3726 result = V4SF_type_node;
3727 break;
3728 case E_DFmode:
3729 result = V2DF_type_node;
3730 break;
3731 default:
3732 break;
3735 /* Propagate qualifiers attached to the element type
3736 onto the vector type. */
3737 if (result && result != type && TYPE_QUALS (type))
3738 result = build_qualified_type (result, TYPE_QUALS (type));
3740 *no_add_attrs = true; /* No need to hang on to the attribute. */
3742 if (!result)
3743 warning (0, "%qE attribute ignored", name);
3744 else
3745 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3747 return NULL_TREE;
3750 /* Return nonzero if FUNC is a naked function. */
3751 static int
3752 spu_naked_function_p (tree func)
3754 tree a;
3756 if (TREE_CODE (func) != FUNCTION_DECL)
3757 abort ();
3759 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3760 return a != NULL_TREE;
3764 spu_initial_elimination_offset (int from, int to)
3766 int saved_regs_size = spu_saved_regs_size ();
3767 int sp_offset = 0;
3768 if (!crtl->is_leaf || crtl->outgoing_args_size
3769 || get_frame_size () || saved_regs_size)
3770 sp_offset = STACK_POINTER_OFFSET;
3771 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3772 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3773 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3774 return get_frame_size ();
3775 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3776 return sp_offset + crtl->outgoing_args_size
3777 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3778 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3779 return get_frame_size () + saved_regs_size + sp_offset;
3780 else
3781 gcc_unreachable ();
3785 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3787 machine_mode mode = TYPE_MODE (type);
3788 int byte_size = ((mode == BLKmode)
3789 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3791 /* Make sure small structs are left justified in a register. */
3792 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3793 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3795 machine_mode smode;
3796 rtvec v;
3797 int i;
3798 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3799 int n = byte_size / UNITS_PER_WORD;
3800 v = rtvec_alloc (nregs);
3801 for (i = 0; i < n; i++)
3803 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3804 gen_rtx_REG (TImode,
3805 FIRST_RETURN_REGNUM
3806 + i),
3807 GEN_INT (UNITS_PER_WORD * i));
3808 byte_size -= UNITS_PER_WORD;
3811 if (n < nregs)
3813 if (byte_size < 4)
3814 byte_size = 4;
3815 smode =
3816 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3817 RTVEC_ELT (v, n) =
3818 gen_rtx_EXPR_LIST (VOIDmode,
3819 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3820 GEN_INT (UNITS_PER_WORD * n));
3822 return gen_rtx_PARALLEL (mode, v);
3824 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3827 static rtx
3828 spu_function_arg (cumulative_args_t cum_v,
3829 machine_mode mode,
3830 const_tree type, bool named ATTRIBUTE_UNUSED)
3832 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3833 int byte_size;
3835 if (*cum >= MAX_REGISTER_ARGS)
3836 return 0;
3838 byte_size = ((mode == BLKmode)
3839 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3841 /* The ABI does not allow parameters to be passed partially in
3842 reg and partially in stack. */
3843 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3844 return 0;
3846 /* Make sure small structs are left justified in a register. */
3847 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3848 && byte_size < UNITS_PER_WORD && byte_size > 0)
3850 machine_mode smode;
3851 rtx gr_reg;
3852 if (byte_size < 4)
3853 byte_size = 4;
3854 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3855 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3856 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3857 const0_rtx);
3858 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3860 else
3861 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3864 static void
3865 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3866 const_tree type, bool named ATTRIBUTE_UNUSED)
3868 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3870 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3872 : mode == BLKmode
3873 ? ((int_size_in_bytes (type) + 15) / 16)
3874 : mode == VOIDmode
3876 : HARD_REGNO_NREGS (cum, mode));
3879 /* Variable sized types are passed by reference. */
3880 static bool
3881 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3882 machine_mode mode ATTRIBUTE_UNUSED,
3883 const_tree type, bool named ATTRIBUTE_UNUSED)
3885 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3889 /* Var args. */
3891 /* Create and return the va_list datatype.
3893 On SPU, va_list is an array type equivalent to
3895 typedef struct __va_list_tag
3897 void *__args __attribute__((__aligned(16)));
3898 void *__skip __attribute__((__aligned(16)));
3900 } va_list[1];
3902 where __args points to the arg that will be returned by the next
3903 va_arg(), and __skip points to the previous stack frame such that
3904 when __args == __skip we should advance __args by 32 bytes. */
3905 static tree
3906 spu_build_builtin_va_list (void)
3908 tree f_args, f_skip, record, type_decl;
3909 bool owp;
3911 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3913 type_decl =
3914 build_decl (BUILTINS_LOCATION,
3915 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3917 f_args = build_decl (BUILTINS_LOCATION,
3918 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3919 f_skip = build_decl (BUILTINS_LOCATION,
3920 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3922 DECL_FIELD_CONTEXT (f_args) = record;
3923 SET_DECL_ALIGN (f_args, 128);
3924 DECL_USER_ALIGN (f_args) = 1;
3926 DECL_FIELD_CONTEXT (f_skip) = record;
3927 SET_DECL_ALIGN (f_skip, 128);
3928 DECL_USER_ALIGN (f_skip) = 1;
3930 TYPE_STUB_DECL (record) = type_decl;
3931 TYPE_NAME (record) = type_decl;
3932 TYPE_FIELDS (record) = f_args;
3933 DECL_CHAIN (f_args) = f_skip;
3935 /* We know this is being padded and we want it too. It is an internal
3936 type so hide the warnings from the user. */
3937 owp = warn_padded;
3938 warn_padded = false;
3940 layout_type (record);
3942 warn_padded = owp;
3944 /* The correct type is an array type of one element. */
3945 return build_array_type (record, build_index_type (size_zero_node));
3948 /* Implement va_start by filling the va_list structure VALIST.
3949 NEXTARG points to the first anonymous stack argument.
3951 The following global variables are used to initialize
3952 the va_list structure:
3954 crtl->args.info;
3955 the CUMULATIVE_ARGS for this function
3957 crtl->args.arg_offset_rtx:
3958 holds the offset of the first anonymous stack argument
3959 (relative to the virtual arg pointer). */
3961 static void
3962 spu_va_start (tree valist, rtx nextarg)
3964 tree f_args, f_skip;
3965 tree args, skip, t;
3967 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3968 f_skip = DECL_CHAIN (f_args);
3970 valist = build_simple_mem_ref (valist);
3971 args =
3972 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3973 skip =
3974 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3976 /* Find the __args area. */
3977 t = make_tree (TREE_TYPE (args), nextarg);
3978 if (crtl->args.pretend_args_size > 0)
3979 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3980 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3981 TREE_SIDE_EFFECTS (t) = 1;
3982 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3984 /* Find the __skip area. */
3985 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3986 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
3987 - STACK_POINTER_OFFSET));
3988 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
3989 TREE_SIDE_EFFECTS (t) = 1;
3990 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3993 /* Gimplify va_arg by updating the va_list structure
3994 VALIST as required to retrieve an argument of type
3995 TYPE, and returning that argument.
3997 ret = va_arg(VALIST, TYPE);
3999 generates code equivalent to:
4001 paddedsize = (sizeof(TYPE) + 15) & -16;
4002 if (VALIST.__args + paddedsize > VALIST.__skip
4003 && VALIST.__args <= VALIST.__skip)
4004 addr = VALIST.__skip + 32;
4005 else
4006 addr = VALIST.__args;
4007 VALIST.__args = addr + paddedsize;
4008 ret = *(TYPE *)addr;
4010 static tree
4011 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4012 gimple_seq * post_p ATTRIBUTE_UNUSED)
4014 tree f_args, f_skip;
4015 tree args, skip;
4016 HOST_WIDE_INT size, rsize;
4017 tree addr, tmp;
4018 bool pass_by_reference_p;
4020 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4021 f_skip = DECL_CHAIN (f_args);
4023 args =
4024 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4025 skip =
4026 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4028 addr = create_tmp_var (ptr_type_node, "va_arg");
4030 /* if an object is dynamically sized, a pointer to it is passed
4031 instead of the object itself. */
4032 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4033 false);
4034 if (pass_by_reference_p)
4035 type = build_pointer_type (type);
4036 size = int_size_in_bytes (type);
4037 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4039 /* build conditional expression to calculate addr. The expression
4040 will be gimplified later. */
4041 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4042 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4043 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4044 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4045 unshare_expr (skip)));
4047 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4048 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4049 unshare_expr (args));
4051 gimplify_assign (addr, tmp, pre_p);
4053 /* update VALIST.__args */
4054 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4055 gimplify_assign (unshare_expr (args), tmp, pre_p);
4057 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4058 addr);
4060 if (pass_by_reference_p)
4061 addr = build_va_arg_indirect_ref (addr);
4063 return build_va_arg_indirect_ref (addr);
4066 /* Save parameter registers starting with the register that corresponds
4067 to the first unnamed parameters. If the first unnamed parameter is
4068 in the stack then save no registers. Set pretend_args_size to the
4069 amount of space needed to save the registers. */
4070 static void
4071 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4072 tree type, int *pretend_size, int no_rtl)
4074 if (!no_rtl)
4076 rtx tmp;
4077 int regno;
4078 int offset;
4079 int ncum = *get_cumulative_args (cum);
4081 /* cum currently points to the last named argument, we want to
4082 start at the next argument. */
4083 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4085 offset = -STACK_POINTER_OFFSET;
4086 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4088 tmp = gen_frame_mem (V4SImode,
4089 plus_constant (Pmode, virtual_incoming_args_rtx,
4090 offset));
4091 emit_move_insn (tmp,
4092 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4093 offset += 16;
4095 *pretend_size = offset + STACK_POINTER_OFFSET;
4099 static void
4100 spu_conditional_register_usage (void)
4102 if (flag_pic)
4104 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4105 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4109 /* This is called any time we inspect the alignment of a register for
4110 addresses. */
4111 static int
4112 reg_aligned_for_addr (rtx x)
4114 int regno =
4115 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4116 return REGNO_POINTER_ALIGN (regno) >= 128;
4119 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4120 into its SYMBOL_REF_FLAGS. */
4121 static void
4122 spu_encode_section_info (tree decl, rtx rtl, int first)
4124 default_encode_section_info (decl, rtl, first);
4126 /* If a variable has a forced alignment to < 16 bytes, mark it with
4127 SYMBOL_FLAG_ALIGN1. */
4128 if (TREE_CODE (decl) == VAR_DECL
4129 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4130 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4133 /* Return TRUE if we are certain the mem refers to a complete object
4134 which is both 16-byte aligned and padded to a 16-byte boundary. This
4135 would make it safe to store with a single instruction.
4136 We guarantee the alignment and padding for static objects by aligning
4137 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4138 FIXME: We currently cannot guarantee this for objects on the stack
4139 because assign_parm_setup_stack calls assign_stack_local with the
4140 alignment of the parameter mode and in that case the alignment never
4141 gets adjusted by LOCAL_ALIGNMENT. */
4142 static int
4143 store_with_one_insn_p (rtx mem)
4145 machine_mode mode = GET_MODE (mem);
4146 rtx addr = XEXP (mem, 0);
4147 if (mode == BLKmode)
4148 return 0;
4149 if (GET_MODE_SIZE (mode) >= 16)
4150 return 1;
4151 /* Only static objects. */
4152 if (GET_CODE (addr) == SYMBOL_REF)
4154 /* We use the associated declaration to make sure the access is
4155 referring to the whole object.
4156 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4157 if it is necessary. Will there be cases where one exists, and
4158 the other does not? Will there be cases where both exist, but
4159 have different types? */
4160 tree decl = MEM_EXPR (mem);
4161 if (decl
4162 && TREE_CODE (decl) == VAR_DECL
4163 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4164 return 1;
4165 decl = SYMBOL_REF_DECL (addr);
4166 if (decl
4167 && TREE_CODE (decl) == VAR_DECL
4168 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4169 return 1;
4171 return 0;
4174 /* Return 1 when the address is not valid for a simple load and store as
4175 required by the '_mov*' patterns. We could make this less strict
4176 for loads, but we prefer mem's to look the same so they are more
4177 likely to be merged. */
4178 static int
4179 address_needs_split (rtx mem)
4181 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4182 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4183 || !(store_with_one_insn_p (mem)
4184 || mem_is_padded_component_ref (mem))))
4185 return 1;
4187 return 0;
4190 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4191 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4192 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4194 /* MEM is known to be an __ea qualified memory access. Emit a call to
4195 fetch the ppu memory to local store, and return its address in local
4196 store. */
4198 static void
4199 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4201 if (is_store)
4203 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4204 if (!cache_fetch_dirty)
4205 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4206 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4207 2, ea_addr, EAmode, ndirty, SImode);
4209 else
4211 if (!cache_fetch)
4212 cache_fetch = init_one_libfunc ("__cache_fetch");
4213 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4214 1, ea_addr, EAmode);
4218 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4219 dirty bit marking, inline.
4221 The cache control data structure is an array of
4223 struct __cache_tag_array
4225 unsigned int tag_lo[4];
4226 unsigned int tag_hi[4];
4227 void *data_pointer[4];
4228 int reserved[4];
4229 vector unsigned short dirty_bits[4];
4230 } */
4232 static void
4233 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4235 rtx ea_addr_si;
4236 HOST_WIDE_INT v;
4237 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4238 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4239 rtx index_mask = gen_reg_rtx (SImode);
4240 rtx tag_arr = gen_reg_rtx (Pmode);
4241 rtx splat_mask = gen_reg_rtx (TImode);
4242 rtx splat = gen_reg_rtx (V4SImode);
4243 rtx splat_hi = NULL_RTX;
4244 rtx tag_index = gen_reg_rtx (Pmode);
4245 rtx block_off = gen_reg_rtx (SImode);
4246 rtx tag_addr = gen_reg_rtx (Pmode);
4247 rtx tag = gen_reg_rtx (V4SImode);
4248 rtx cache_tag = gen_reg_rtx (V4SImode);
4249 rtx cache_tag_hi = NULL_RTX;
4250 rtx cache_ptrs = gen_reg_rtx (TImode);
4251 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4252 rtx tag_equal = gen_reg_rtx (V4SImode);
4253 rtx tag_equal_hi = NULL_RTX;
4254 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4255 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4256 rtx eq_index = gen_reg_rtx (SImode);
4257 rtx bcomp, hit_label, hit_ref, cont_label;
4258 rtx_insn *insn;
4260 if (spu_ea_model != 32)
4262 splat_hi = gen_reg_rtx (V4SImode);
4263 cache_tag_hi = gen_reg_rtx (V4SImode);
4264 tag_equal_hi = gen_reg_rtx (V4SImode);
4267 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4268 emit_move_insn (tag_arr, tag_arr_sym);
4269 v = 0x0001020300010203LL;
4270 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4271 ea_addr_si = ea_addr;
4272 if (spu_ea_model != 32)
4273 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4275 /* tag_index = ea_addr & (tag_array_size - 128) */
4276 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4278 /* splat ea_addr to all 4 slots. */
4279 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4280 /* Similarly for high 32 bits of ea_addr. */
4281 if (spu_ea_model != 32)
4282 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4284 /* block_off = ea_addr & 127 */
4285 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4287 /* tag_addr = tag_arr + tag_index */
4288 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4290 /* Read cache tags. */
4291 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4292 if (spu_ea_model != 32)
4293 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4294 plus_constant (Pmode,
4295 tag_addr, 16)));
4297 /* tag = ea_addr & -128 */
4298 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4300 /* Read all four cache data pointers. */
4301 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4302 plus_constant (Pmode,
4303 tag_addr, 32)));
4305 /* Compare tags. */
4306 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4307 if (spu_ea_model != 32)
4309 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4310 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4313 /* At most one of the tags compare equal, so tag_equal has one
4314 32-bit slot set to all 1's, with the other slots all zero.
4315 gbb picks off low bit from each byte in the 128-bit registers,
4316 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4317 we have a hit. */
4318 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4319 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4321 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4322 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4324 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4325 (rotating eq_index mod 16 bytes). */
4326 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4327 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4329 /* Add block offset to form final data address. */
4330 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4332 /* Check that we did hit. */
4333 hit_label = gen_label_rtx ();
4334 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4335 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4336 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4337 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4338 hit_ref, pc_rtx)));
4339 /* Say that this branch is very likely to happen. */
4340 add_reg_br_prob_note (insn, profile_probability::very_likely ());
4342 ea_load_store (mem, is_store, ea_addr, data_addr);
4343 cont_label = gen_label_rtx ();
4344 emit_jump_insn (gen_jump (cont_label));
4345 emit_barrier ();
4347 emit_label (hit_label);
4349 if (is_store)
4351 HOST_WIDE_INT v_hi;
4352 rtx dirty_bits = gen_reg_rtx (TImode);
4353 rtx dirty_off = gen_reg_rtx (SImode);
4354 rtx dirty_128 = gen_reg_rtx (TImode);
4355 rtx neg_block_off = gen_reg_rtx (SImode);
4357 /* Set up mask with one dirty bit per byte of the mem we are
4358 writing, starting from top bit. */
4359 v_hi = v = -1;
4360 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4361 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4363 v_hi = v;
4364 v = 0;
4366 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4368 /* Form index into cache dirty_bits. eq_index is one of
4369 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4370 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4371 offset to each of the four dirty_bits elements. */
4372 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4374 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4376 /* Rotate bit mask to proper bit. */
4377 emit_insn (gen_negsi2 (neg_block_off, block_off));
4378 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4379 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4381 /* Or in the new dirty bits. */
4382 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4384 /* Store. */
4385 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4388 emit_label (cont_label);
4391 static rtx
4392 expand_ea_mem (rtx mem, bool is_store)
4394 rtx ea_addr;
4395 rtx data_addr = gen_reg_rtx (Pmode);
4396 rtx new_mem;
4398 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4399 if (optimize_size || optimize == 0)
4400 ea_load_store (mem, is_store, ea_addr, data_addr);
4401 else
4402 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4404 if (ea_alias_set == -1)
4405 ea_alias_set = new_alias_set ();
4407 /* We generate a new MEM RTX to refer to the copy of the data
4408 in the cache. We do not copy memory attributes (except the
4409 alignment) from the original MEM, as they may no longer apply
4410 to the cache copy. */
4411 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4412 set_mem_alias_set (new_mem, ea_alias_set);
4413 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4415 return new_mem;
4419 spu_expand_mov (rtx * ops, machine_mode mode)
4421 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4423 /* Perform the move in the destination SUBREG's inner mode. */
4424 ops[0] = SUBREG_REG (ops[0]);
4425 mode = GET_MODE (ops[0]);
4426 ops[1] = gen_lowpart_common (mode, ops[1]);
4427 gcc_assert (ops[1]);
4430 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4432 rtx from = SUBREG_REG (ops[1]);
4433 machine_mode imode = int_mode_for_mode (GET_MODE (from));
4435 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4436 && GET_MODE_CLASS (imode) == MODE_INT
4437 && subreg_lowpart_p (ops[1]));
4439 if (GET_MODE_SIZE (imode) < 4)
4440 imode = SImode;
4441 if (imode != GET_MODE (from))
4442 from = gen_rtx_SUBREG (imode, from, 0);
4444 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4446 enum insn_code icode = convert_optab_handler (trunc_optab,
4447 mode, imode);
4448 emit_insn (GEN_FCN (icode) (ops[0], from));
4450 else
4451 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4452 return 1;
4455 /* At least one of the operands needs to be a register. */
4456 if ((reload_in_progress | reload_completed) == 0
4457 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4459 rtx temp = force_reg (mode, ops[1]);
4460 emit_move_insn (ops[0], temp);
4461 return 1;
4463 if (reload_in_progress || reload_completed)
4465 if (CONSTANT_P (ops[1]))
4466 return spu_split_immediate (ops);
4467 return 0;
4470 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4471 extend them. */
4472 if (GET_CODE (ops[1]) == CONST_INT)
4474 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4475 if (val != INTVAL (ops[1]))
4477 emit_move_insn (ops[0], GEN_INT (val));
4478 return 1;
4481 if (MEM_P (ops[0]))
4483 if (MEM_ADDR_SPACE (ops[0]))
4484 ops[0] = expand_ea_mem (ops[0], true);
4485 return spu_split_store (ops);
4487 if (MEM_P (ops[1]))
4489 if (MEM_ADDR_SPACE (ops[1]))
4490 ops[1] = expand_ea_mem (ops[1], false);
4491 return spu_split_load (ops);
4494 return 0;
4497 static void
4498 spu_convert_move (rtx dst, rtx src)
4500 machine_mode mode = GET_MODE (dst);
4501 machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4502 rtx reg;
4503 gcc_assert (GET_MODE (src) == TImode);
4504 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4505 emit_insn (gen_rtx_SET (reg,
4506 gen_rtx_TRUNCATE (int_mode,
4507 gen_rtx_LSHIFTRT (TImode, src,
4508 GEN_INT (int_mode == DImode ? 64 : 96)))));
4509 if (int_mode != mode)
4511 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4512 emit_move_insn (dst, reg);
4516 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4517 the address from SRC and SRC+16. Return a REG or CONST_INT that
4518 specifies how many bytes to rotate the loaded registers, plus any
4519 extra from EXTRA_ROTQBY. The address and rotate amounts are
4520 normalized to improve merging of loads and rotate computations. */
4521 static rtx
4522 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4524 rtx addr = XEXP (src, 0);
4525 rtx p0, p1, rot, addr0, addr1;
4526 int rot_amt;
4528 rot = 0;
4529 rot_amt = 0;
4531 if (MEM_ALIGN (src) >= 128)
4532 /* Address is already aligned; simply perform a TImode load. */ ;
4533 else if (GET_CODE (addr) == PLUS)
4535 /* 8 cases:
4536 aligned reg + aligned reg => lqx
4537 aligned reg + unaligned reg => lqx, rotqby
4538 aligned reg + aligned const => lqd
4539 aligned reg + unaligned const => lqd, rotqbyi
4540 unaligned reg + aligned reg => lqx, rotqby
4541 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4542 unaligned reg + aligned const => lqd, rotqby
4543 unaligned reg + unaligned const -> not allowed by legitimate address
4545 p0 = XEXP (addr, 0);
4546 p1 = XEXP (addr, 1);
4547 if (!reg_aligned_for_addr (p0))
4549 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4551 rot = gen_reg_rtx (SImode);
4552 emit_insn (gen_addsi3 (rot, p0, p1));
4554 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4556 if (INTVAL (p1) > 0
4557 && REG_POINTER (p0)
4558 && INTVAL (p1) * BITS_PER_UNIT
4559 < REGNO_POINTER_ALIGN (REGNO (p0)))
4561 rot = gen_reg_rtx (SImode);
4562 emit_insn (gen_addsi3 (rot, p0, p1));
4563 addr = p0;
4565 else
4567 rtx x = gen_reg_rtx (SImode);
4568 emit_move_insn (x, p1);
4569 if (!spu_arith_operand (p1, SImode))
4570 p1 = x;
4571 rot = gen_reg_rtx (SImode);
4572 emit_insn (gen_addsi3 (rot, p0, p1));
4573 addr = gen_rtx_PLUS (Pmode, p0, x);
4576 else
4577 rot = p0;
4579 else
4581 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4583 rot_amt = INTVAL (p1) & 15;
4584 if (INTVAL (p1) & -16)
4586 p1 = GEN_INT (INTVAL (p1) & -16);
4587 addr = gen_rtx_PLUS (SImode, p0, p1);
4589 else
4590 addr = p0;
4592 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4593 rot = p1;
4596 else if (REG_P (addr))
4598 if (!reg_aligned_for_addr (addr))
4599 rot = addr;
4601 else if (GET_CODE (addr) == CONST)
4603 if (GET_CODE (XEXP (addr, 0)) == PLUS
4604 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4605 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4607 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4608 if (rot_amt & -16)
4609 addr = gen_rtx_CONST (Pmode,
4610 gen_rtx_PLUS (Pmode,
4611 XEXP (XEXP (addr, 0), 0),
4612 GEN_INT (rot_amt & -16)));
4613 else
4614 addr = XEXP (XEXP (addr, 0), 0);
4616 else
4618 rot = gen_reg_rtx (Pmode);
4619 emit_move_insn (rot, addr);
4622 else if (GET_CODE (addr) == CONST_INT)
4624 rot_amt = INTVAL (addr);
4625 addr = GEN_INT (rot_amt & -16);
4627 else if (!ALIGNED_SYMBOL_REF_P (addr))
4629 rot = gen_reg_rtx (Pmode);
4630 emit_move_insn (rot, addr);
4633 rot_amt += extra_rotby;
4635 rot_amt &= 15;
4637 if (rot && rot_amt)
4639 rtx x = gen_reg_rtx (SImode);
4640 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4641 rot = x;
4642 rot_amt = 0;
4644 if (!rot && rot_amt)
4645 rot = GEN_INT (rot_amt);
4647 addr0 = copy_rtx (addr);
4648 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4649 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4651 if (dst1)
4653 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4654 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4655 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4658 return rot;
4662 spu_split_load (rtx * ops)
4664 machine_mode mode = GET_MODE (ops[0]);
4665 rtx addr, load, rot;
4666 int rot_amt;
4668 if (GET_MODE_SIZE (mode) >= 16)
4669 return 0;
4671 addr = XEXP (ops[1], 0);
4672 gcc_assert (GET_CODE (addr) != AND);
4674 if (!address_needs_split (ops[1]))
4676 ops[1] = change_address (ops[1], TImode, addr);
4677 load = gen_reg_rtx (TImode);
4678 emit_insn (gen__movti (load, ops[1]));
4679 spu_convert_move (ops[0], load);
4680 return 1;
4683 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4685 load = gen_reg_rtx (TImode);
4686 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4688 if (rot)
4689 emit_insn (gen_rotqby_ti (load, load, rot));
4691 spu_convert_move (ops[0], load);
4692 return 1;
4696 spu_split_store (rtx * ops)
4698 machine_mode mode = GET_MODE (ops[0]);
4699 rtx reg;
4700 rtx addr, p0, p1, p1_lo, smem;
4701 int aform;
4702 int scalar;
4704 if (GET_MODE_SIZE (mode) >= 16)
4705 return 0;
4707 addr = XEXP (ops[0], 0);
4708 gcc_assert (GET_CODE (addr) != AND);
4710 if (!address_needs_split (ops[0]))
4712 reg = gen_reg_rtx (TImode);
4713 emit_insn (gen_spu_convert (reg, ops[1]));
4714 ops[0] = change_address (ops[0], TImode, addr);
4715 emit_move_insn (ops[0], reg);
4716 return 1;
4719 if (GET_CODE (addr) == PLUS)
4721 /* 8 cases:
4722 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4723 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4724 aligned reg + aligned const => lqd, c?d, shuf, stqx
4725 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4726 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4727 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4728 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4729 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4731 aform = 0;
4732 p0 = XEXP (addr, 0);
4733 p1 = p1_lo = XEXP (addr, 1);
4734 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4736 p1_lo = GEN_INT (INTVAL (p1) & 15);
4737 if (reg_aligned_for_addr (p0))
4739 p1 = GEN_INT (INTVAL (p1) & -16);
4740 if (p1 == const0_rtx)
4741 addr = p0;
4742 else
4743 addr = gen_rtx_PLUS (SImode, p0, p1);
4745 else
4747 rtx x = gen_reg_rtx (SImode);
4748 emit_move_insn (x, p1);
4749 addr = gen_rtx_PLUS (SImode, p0, x);
4753 else if (REG_P (addr))
4755 aform = 0;
4756 p0 = addr;
4757 p1 = p1_lo = const0_rtx;
4759 else
4761 aform = 1;
4762 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4763 p1 = 0; /* aform doesn't use p1 */
4764 p1_lo = addr;
4765 if (ALIGNED_SYMBOL_REF_P (addr))
4766 p1_lo = const0_rtx;
4767 else if (GET_CODE (addr) == CONST
4768 && GET_CODE (XEXP (addr, 0)) == PLUS
4769 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4770 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4772 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4773 if ((v & -16) != 0)
4774 addr = gen_rtx_CONST (Pmode,
4775 gen_rtx_PLUS (Pmode,
4776 XEXP (XEXP (addr, 0), 0),
4777 GEN_INT (v & -16)));
4778 else
4779 addr = XEXP (XEXP (addr, 0), 0);
4780 p1_lo = GEN_INT (v & 15);
4782 else if (GET_CODE (addr) == CONST_INT)
4784 p1_lo = GEN_INT (INTVAL (addr) & 15);
4785 addr = GEN_INT (INTVAL (addr) & -16);
4787 else
4789 p1_lo = gen_reg_rtx (SImode);
4790 emit_move_insn (p1_lo, addr);
4794 gcc_assert (aform == 0 || aform == 1);
4795 reg = gen_reg_rtx (TImode);
4797 scalar = store_with_one_insn_p (ops[0]);
4798 if (!scalar)
4800 /* We could copy the flags from the ops[0] MEM to mem here,
4801 We don't because we want this load to be optimized away if
4802 possible, and copying the flags will prevent that in certain
4803 cases, e.g. consider the volatile flag. */
4805 rtx pat = gen_reg_rtx (TImode);
4806 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4807 set_mem_alias_set (lmem, 0);
4808 emit_insn (gen_movti (reg, lmem));
4810 if (!p0 || reg_aligned_for_addr (p0))
4811 p0 = stack_pointer_rtx;
4812 if (!p1_lo)
4813 p1_lo = const0_rtx;
4815 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4816 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4818 else
4820 if (GET_CODE (ops[1]) == REG)
4821 emit_insn (gen_spu_convert (reg, ops[1]));
4822 else if (GET_CODE (ops[1]) == SUBREG)
4823 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4824 else
4825 abort ();
4828 if (GET_MODE_SIZE (mode) < 4 && scalar)
4829 emit_insn (gen_ashlti3
4830 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4832 smem = change_address (ops[0], TImode, copy_rtx (addr));
4833 /* We can't use the previous alias set because the memory has changed
4834 size and can potentially overlap objects of other types. */
4835 set_mem_alias_set (smem, 0);
4837 emit_insn (gen_movti (smem, reg));
4838 return 1;
4841 /* Return TRUE if X is MEM which is a struct member reference
4842 and the member can safely be loaded and stored with a single
4843 instruction because it is padded. */
4844 static int
4845 mem_is_padded_component_ref (rtx x)
4847 tree t = MEM_EXPR (x);
4848 tree r;
4849 if (!t || TREE_CODE (t) != COMPONENT_REF)
4850 return 0;
4851 t = TREE_OPERAND (t, 1);
4852 if (!t || TREE_CODE (t) != FIELD_DECL
4853 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4854 return 0;
4855 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4856 r = DECL_FIELD_CONTEXT (t);
4857 if (!r || TREE_CODE (r) != RECORD_TYPE)
4858 return 0;
4859 /* Make sure they are the same mode */
4860 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4861 return 0;
4862 /* If there are no following fields then the field alignment assures
4863 the structure is padded to the alignment which means this field is
4864 padded too. */
4865 if (TREE_CHAIN (t) == 0)
4866 return 1;
4867 /* If the following field is also aligned then this field will be
4868 padded. */
4869 t = TREE_CHAIN (t);
4870 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4871 return 1;
4872 return 0;
4875 /* Parse the -mfixed-range= option string. */
4876 static void
4877 fix_range (const char *const_str)
4879 int i, first, last;
4880 char *str, *dash, *comma;
4882 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4883 REG2 are either register names or register numbers. The effect
4884 of this option is to mark the registers in the range from REG1 to
4885 REG2 as ``fixed'' so they won't be used by the compiler. */
4887 i = strlen (const_str);
4888 str = (char *) alloca (i + 1);
4889 memcpy (str, const_str, i + 1);
4891 while (1)
4893 dash = strchr (str, '-');
4894 if (!dash)
4896 warning (0, "value of -mfixed-range must have form REG1-REG2");
4897 return;
4899 *dash = '\0';
4900 comma = strchr (dash + 1, ',');
4901 if (comma)
4902 *comma = '\0';
4904 first = decode_reg_name (str);
4905 if (first < 0)
4907 warning (0, "unknown register name: %s", str);
4908 return;
4911 last = decode_reg_name (dash + 1);
4912 if (last < 0)
4914 warning (0, "unknown register name: %s", dash + 1);
4915 return;
4918 *dash = '-';
4920 if (first > last)
4922 warning (0, "%s-%s is an empty range", str, dash + 1);
4923 return;
4926 for (i = first; i <= last; ++i)
4927 fixed_regs[i] = call_used_regs[i] = 1;
4929 if (!comma)
4930 break;
4932 *comma = ',';
4933 str = comma + 1;
4937 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4938 can be generated using the fsmbi instruction. */
4940 fsmbi_const_p (rtx x)
4942 if (CONSTANT_P (x))
4944 /* We can always choose TImode for CONST_INT because the high bits
4945 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4946 enum immediate_class c = classify_immediate (x, TImode);
4947 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4949 return 0;
4952 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4953 can be generated using the cbd, chd, cwd or cdd instruction. */
4955 cpat_const_p (rtx x, machine_mode mode)
4957 if (CONSTANT_P (x))
4959 enum immediate_class c = classify_immediate (x, mode);
4960 return c == IC_CPAT;
4962 return 0;
4966 gen_cpat_const (rtx * ops)
4968 unsigned char dst[16];
4969 int i, offset, shift, isize;
4970 if (GET_CODE (ops[3]) != CONST_INT
4971 || GET_CODE (ops[2]) != CONST_INT
4972 || (GET_CODE (ops[1]) != CONST_INT
4973 && GET_CODE (ops[1]) != REG))
4974 return 0;
4975 if (GET_CODE (ops[1]) == REG
4976 && (!REG_POINTER (ops[1])
4977 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4978 return 0;
4980 for (i = 0; i < 16; i++)
4981 dst[i] = i + 16;
4982 isize = INTVAL (ops[3]);
4983 if (isize == 1)
4984 shift = 3;
4985 else if (isize == 2)
4986 shift = 2;
4987 else
4988 shift = 0;
4989 offset = (INTVAL (ops[2]) +
4990 (GET_CODE (ops[1]) ==
4991 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4992 for (i = 0; i < isize; i++)
4993 dst[offset + i] = i + shift;
4994 return array_to_constant (TImode, dst);
4997 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4998 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4999 than 16 bytes, the value is repeated across the rest of the array. */
5000 void
5001 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5003 HOST_WIDE_INT val;
5004 int i, j, first;
5006 memset (arr, 0, 16);
5007 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5008 if (GET_CODE (x) == CONST_INT
5009 || (GET_CODE (x) == CONST_DOUBLE
5010 && (mode == SFmode || mode == DFmode)))
5012 gcc_assert (mode != VOIDmode && mode != BLKmode);
5014 if (GET_CODE (x) == CONST_DOUBLE)
5015 val = const_double_to_hwint (x);
5016 else
5017 val = INTVAL (x);
5018 first = GET_MODE_SIZE (mode) - 1;
5019 for (i = first; i >= 0; i--)
5021 arr[i] = val & 0xff;
5022 val >>= 8;
5024 /* Splat the constant across the whole array. */
5025 for (j = 0, i = first + 1; i < 16; i++)
5027 arr[i] = arr[j];
5028 j = (j == first) ? 0 : j + 1;
5031 else if (GET_CODE (x) == CONST_DOUBLE)
5033 val = CONST_DOUBLE_LOW (x);
5034 for (i = 15; i >= 8; i--)
5036 arr[i] = val & 0xff;
5037 val >>= 8;
5039 val = CONST_DOUBLE_HIGH (x);
5040 for (i = 7; i >= 0; i--)
5042 arr[i] = val & 0xff;
5043 val >>= 8;
5046 else if (GET_CODE (x) == CONST_VECTOR)
5048 int units;
5049 rtx elt;
5050 mode = GET_MODE_INNER (mode);
5051 units = CONST_VECTOR_NUNITS (x);
5052 for (i = 0; i < units; i++)
5054 elt = CONST_VECTOR_ELT (x, i);
5055 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5057 if (GET_CODE (elt) == CONST_DOUBLE)
5058 val = const_double_to_hwint (elt);
5059 else
5060 val = INTVAL (elt);
5061 first = GET_MODE_SIZE (mode) - 1;
5062 if (first + i * GET_MODE_SIZE (mode) > 16)
5063 abort ();
5064 for (j = first; j >= 0; j--)
5066 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5067 val >>= 8;
5072 else
5073 gcc_unreachable();
5076 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5077 smaller than 16 bytes, use the bytes that would represent that value
5078 in a register, e.g., for QImode return the value of arr[3]. */
5080 array_to_constant (machine_mode mode, const unsigned char arr[16])
5082 machine_mode inner_mode;
5083 rtvec v;
5084 int units, size, i, j, k;
5085 HOST_WIDE_INT val;
5087 if (GET_MODE_CLASS (mode) == MODE_INT
5088 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5090 j = GET_MODE_SIZE (mode);
5091 i = j < 4 ? 4 - j : 0;
5092 for (val = 0; i < j; i++)
5093 val = (val << 8) | arr[i];
5094 val = trunc_int_for_mode (val, mode);
5095 return GEN_INT (val);
5098 if (mode == TImode)
5100 HOST_WIDE_INT high;
5101 for (i = high = 0; i < 8; i++)
5102 high = (high << 8) | arr[i];
5103 for (i = 8, val = 0; i < 16; i++)
5104 val = (val << 8) | arr[i];
5105 return immed_double_const (val, high, TImode);
5107 if (mode == SFmode)
5109 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5110 val = trunc_int_for_mode (val, SImode);
5111 return hwint_to_const_double (SFmode, val);
5113 if (mode == DFmode)
5115 for (i = 0, val = 0; i < 8; i++)
5116 val = (val << 8) | arr[i];
5117 return hwint_to_const_double (DFmode, val);
5120 if (!VECTOR_MODE_P (mode))
5121 abort ();
5123 units = GET_MODE_NUNITS (mode);
5124 size = GET_MODE_UNIT_SIZE (mode);
5125 inner_mode = GET_MODE_INNER (mode);
5126 v = rtvec_alloc (units);
5128 for (k = i = 0; i < units; ++i)
5130 val = 0;
5131 for (j = 0; j < size; j++, k++)
5132 val = (val << 8) | arr[k];
5134 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5135 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5136 else
5137 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5139 if (k > 16)
5140 abort ();
5142 return gen_rtx_CONST_VECTOR (mode, v);
5145 static void
5146 reloc_diagnostic (rtx x)
5148 tree decl = 0;
5149 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5150 return;
5152 if (GET_CODE (x) == SYMBOL_REF)
5153 decl = SYMBOL_REF_DECL (x);
5154 else if (GET_CODE (x) == CONST
5155 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5156 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5158 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5159 if (decl && !DECL_P (decl))
5160 decl = 0;
5162 /* The decl could be a string constant. */
5163 if (decl && DECL_P (decl))
5165 location_t loc;
5166 /* We use last_assemble_variable_decl to get line information. It's
5167 not always going to be right and might not even be close, but will
5168 be right for the more common cases. */
5169 if (!last_assemble_variable_decl || in_section == ctors_section)
5170 loc = DECL_SOURCE_LOCATION (decl);
5171 else
5172 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5174 if (TARGET_WARN_RELOC)
5175 warning_at (loc, 0,
5176 "creating run-time relocation for %qD", decl);
5177 else
5178 error_at (loc,
5179 "creating run-time relocation for %qD", decl);
5181 else
5183 if (TARGET_WARN_RELOC)
5184 warning_at (input_location, 0, "creating run-time relocation");
5185 else
5186 error_at (input_location, "creating run-time relocation");
5190 /* Hook into assemble_integer so we can generate an error for run-time
5191 relocations. The SPU ABI disallows them. */
5192 static bool
5193 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5195 /* By default run-time relocations aren't supported, but we allow them
5196 in case users support it in their own run-time loader. And we provide
5197 a warning for those users that don't. */
5198 if ((GET_CODE (x) == SYMBOL_REF)
5199 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5200 reloc_diagnostic (x);
5202 return default_assemble_integer (x, size, aligned_p);
5205 static void
5206 spu_asm_globalize_label (FILE * file, const char *name)
5208 fputs ("\t.global\t", file);
5209 assemble_name (file, name);
5210 fputs ("\n", file);
5213 static bool
5214 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5215 int opno ATTRIBUTE_UNUSED, int *total,
5216 bool speed ATTRIBUTE_UNUSED)
5218 int code = GET_CODE (x);
5219 int cost = COSTS_N_INSNS (2);
5221 /* Folding to a CONST_VECTOR will use extra space but there might
5222 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5223 only if it allows us to fold away multiple insns. Changing the cost
5224 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5225 because this cost will only be compared against a single insn.
5226 if (code == CONST_VECTOR)
5227 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5230 /* Use defaults for float operations. Not accurate but good enough. */
5231 if (mode == DFmode)
5233 *total = COSTS_N_INSNS (13);
5234 return true;
5236 if (mode == SFmode)
5238 *total = COSTS_N_INSNS (6);
5239 return true;
5241 switch (code)
5243 case CONST_INT:
5244 if (satisfies_constraint_K (x))
5245 *total = 0;
5246 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5247 *total = COSTS_N_INSNS (1);
5248 else
5249 *total = COSTS_N_INSNS (3);
5250 return true;
5252 case CONST:
5253 *total = COSTS_N_INSNS (3);
5254 return true;
5256 case LABEL_REF:
5257 case SYMBOL_REF:
5258 *total = COSTS_N_INSNS (0);
5259 return true;
5261 case CONST_DOUBLE:
5262 *total = COSTS_N_INSNS (5);
5263 return true;
5265 case FLOAT_EXTEND:
5266 case FLOAT_TRUNCATE:
5267 case FLOAT:
5268 case UNSIGNED_FLOAT:
5269 case FIX:
5270 case UNSIGNED_FIX:
5271 *total = COSTS_N_INSNS (7);
5272 return true;
5274 case PLUS:
5275 if (mode == TImode)
5277 *total = COSTS_N_INSNS (9);
5278 return true;
5280 break;
5282 case MULT:
5283 cost =
5284 GET_CODE (XEXP (x, 0)) ==
5285 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5286 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5288 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5290 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5291 cost = COSTS_N_INSNS (14);
5292 if ((val & 0xffff) == 0)
5293 cost = COSTS_N_INSNS (9);
5294 else if (val > 0 && val < 0x10000)
5295 cost = COSTS_N_INSNS (11);
5298 *total = cost;
5299 return true;
5300 case DIV:
5301 case UDIV:
5302 case MOD:
5303 case UMOD:
5304 *total = COSTS_N_INSNS (20);
5305 return true;
5306 case ROTATE:
5307 case ROTATERT:
5308 case ASHIFT:
5309 case ASHIFTRT:
5310 case LSHIFTRT:
5311 *total = COSTS_N_INSNS (4);
5312 return true;
5313 case UNSPEC:
5314 if (XINT (x, 1) == UNSPEC_CONVERT)
5315 *total = COSTS_N_INSNS (0);
5316 else
5317 *total = COSTS_N_INSNS (4);
5318 return true;
5320 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5321 if (GET_MODE_CLASS (mode) == MODE_INT
5322 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5323 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5324 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5325 *total = cost;
5326 return true;
5329 static machine_mode
5330 spu_unwind_word_mode (void)
5332 return SImode;
5335 /* Decide whether we can make a sibling call to a function. DECL is the
5336 declaration of the function being targeted by the call and EXP is the
5337 CALL_EXPR representing the call. */
5338 static bool
5339 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5341 return decl && !TARGET_LARGE_MEM;
5344 /* We need to correctly update the back chain pointer and the Available
5345 Stack Size (which is in the second slot of the sp register.) */
5346 void
5347 spu_allocate_stack (rtx op0, rtx op1)
5349 HOST_WIDE_INT v;
5350 rtx chain = gen_reg_rtx (V4SImode);
5351 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5352 rtx sp = gen_reg_rtx (V4SImode);
5353 rtx splatted = gen_reg_rtx (V4SImode);
5354 rtx pat = gen_reg_rtx (TImode);
5356 /* copy the back chain so we can save it back again. */
5357 emit_move_insn (chain, stack_bot);
5359 op1 = force_reg (SImode, op1);
5361 v = 0x1020300010203ll;
5362 emit_move_insn (pat, immed_double_const (v, v, TImode));
5363 emit_insn (gen_shufb (splatted, op1, op1, pat));
5365 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5366 emit_insn (gen_subv4si3 (sp, sp, splatted));
5368 if (flag_stack_check)
5370 rtx avail = gen_reg_rtx(SImode);
5371 rtx result = gen_reg_rtx(SImode);
5372 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5373 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5374 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5377 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5379 emit_move_insn (stack_bot, chain);
5381 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5384 void
5385 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5387 static unsigned char arr[16] =
5388 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5389 rtx temp = gen_reg_rtx (SImode);
5390 rtx temp2 = gen_reg_rtx (SImode);
5391 rtx temp3 = gen_reg_rtx (V4SImode);
5392 rtx temp4 = gen_reg_rtx (V4SImode);
5393 rtx pat = gen_reg_rtx (TImode);
5394 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5396 /* Restore the backchain from the first word, sp from the second. */
5397 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5398 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5400 emit_move_insn (pat, array_to_constant (TImode, arr));
5402 /* Compute Available Stack Size for sp */
5403 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5404 emit_insn (gen_shufb (temp3, temp, temp, pat));
5406 /* Compute Available Stack Size for back chain */
5407 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5408 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5409 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5411 emit_insn (gen_addv4si3 (sp, sp, temp3));
5412 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5415 static void
5416 spu_init_libfuncs (void)
5418 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5419 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5420 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5421 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5422 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5423 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5424 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5425 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5426 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5427 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5428 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5429 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5431 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5432 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5434 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5435 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5436 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5437 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5438 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5439 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5440 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5441 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5442 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5443 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5444 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5445 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5447 set_optab_libfunc (smul_optab, TImode, "__multi3");
5448 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5449 set_optab_libfunc (smod_optab, TImode, "__modti3");
5450 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5451 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5452 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5455 /* Make a subreg, stripping any existing subreg. We could possibly just
5456 call simplify_subreg, but in this case we know what we want. */
5458 spu_gen_subreg (machine_mode mode, rtx x)
5460 if (GET_CODE (x) == SUBREG)
5461 x = SUBREG_REG (x);
5462 if (GET_MODE (x) == mode)
5463 return x;
5464 return gen_rtx_SUBREG (mode, x, 0);
5467 static bool
5468 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5470 return (TYPE_MODE (type) == BLKmode
5471 && ((type) == 0
5472 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5473 || int_size_in_bytes (type) >
5474 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5477 /* Create the built-in types and functions */
5479 enum spu_function_code
5481 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5482 #include "spu-builtins.def"
5483 #undef DEF_BUILTIN
5484 NUM_SPU_BUILTINS
5487 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5489 struct spu_builtin_description spu_builtins[] = {
5490 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5491 {fcode, icode, name, type, params},
5492 #include "spu-builtins.def"
5493 #undef DEF_BUILTIN
5496 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5498 /* Returns the spu builtin decl for CODE. */
5500 static tree
5501 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5503 if (code >= NUM_SPU_BUILTINS)
5504 return error_mark_node;
5506 return spu_builtin_decls[code];
5510 static void
5511 spu_init_builtins (void)
5513 struct spu_builtin_description *d;
5514 unsigned int i;
5516 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5517 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5518 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5519 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5520 V4SF_type_node = build_vector_type (float_type_node, 4);
5521 V2DF_type_node = build_vector_type (double_type_node, 2);
5523 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5524 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5525 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5526 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5528 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5530 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5531 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5532 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5533 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5534 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5535 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5536 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5537 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5538 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5539 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5540 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5541 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5543 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5544 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5545 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5546 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5547 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5548 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5549 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5552 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5553 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5555 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5557 spu_builtin_types[SPU_BTI_PTR] =
5558 build_pointer_type (build_qualified_type
5559 (void_type_node,
5560 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5562 /* For each builtin we build a new prototype. The tree code will make
5563 sure nodes are shared. */
5564 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5566 tree p;
5567 char name[64]; /* build_function will make a copy. */
5568 int parm;
5570 if (d->name == 0)
5571 continue;
5573 /* Find last parm. */
5574 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5577 p = void_list_node;
5578 while (parm > 1)
5579 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5581 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5583 sprintf (name, "__builtin_%s", d->name);
5584 spu_builtin_decls[i] =
5585 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5586 if (d->fcode == SPU_MASK_FOR_LOAD)
5587 TREE_READONLY (spu_builtin_decls[i]) = 1;
5589 /* These builtins don't throw. */
5590 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5594 void
5595 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5597 static unsigned char arr[16] =
5598 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5600 rtx temp = gen_reg_rtx (Pmode);
5601 rtx temp2 = gen_reg_rtx (V4SImode);
5602 rtx temp3 = gen_reg_rtx (V4SImode);
5603 rtx pat = gen_reg_rtx (TImode);
5604 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5606 emit_move_insn (pat, array_to_constant (TImode, arr));
5608 /* Restore the sp. */
5609 emit_move_insn (temp, op1);
5610 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5612 /* Compute available stack size for sp. */
5613 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5614 emit_insn (gen_shufb (temp3, temp, temp, pat));
5616 emit_insn (gen_addv4si3 (sp, sp, temp3));
5617 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5621 spu_safe_dma (HOST_WIDE_INT channel)
5623 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5626 void
5627 spu_builtin_splats (rtx ops[])
5629 machine_mode mode = GET_MODE (ops[0]);
5630 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5632 unsigned char arr[16];
5633 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5634 emit_move_insn (ops[0], array_to_constant (mode, arr));
5636 else
5638 rtx reg = gen_reg_rtx (TImode);
5639 rtx shuf;
5640 if (GET_CODE (ops[1]) != REG
5641 && GET_CODE (ops[1]) != SUBREG)
5642 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5643 switch (mode)
5645 case E_V2DImode:
5646 case E_V2DFmode:
5647 shuf =
5648 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5649 TImode);
5650 break;
5651 case E_V4SImode:
5652 case E_V4SFmode:
5653 shuf =
5654 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5655 TImode);
5656 break;
5657 case E_V8HImode:
5658 shuf =
5659 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5660 TImode);
5661 break;
5662 case E_V16QImode:
5663 shuf =
5664 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5665 TImode);
5666 break;
5667 default:
5668 abort ();
5670 emit_move_insn (reg, shuf);
5671 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5675 void
5676 spu_builtin_extract (rtx ops[])
5678 machine_mode mode;
5679 rtx rot, from, tmp;
5681 mode = GET_MODE (ops[1]);
5683 if (GET_CODE (ops[2]) == CONST_INT)
5685 switch (mode)
5687 case E_V16QImode:
5688 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5689 break;
5690 case E_V8HImode:
5691 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5692 break;
5693 case E_V4SFmode:
5694 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5695 break;
5696 case E_V4SImode:
5697 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5698 break;
5699 case E_V2DImode:
5700 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5701 break;
5702 case E_V2DFmode:
5703 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5704 break;
5705 default:
5706 abort ();
5708 return;
5711 from = spu_gen_subreg (TImode, ops[1]);
5712 rot = gen_reg_rtx (TImode);
5713 tmp = gen_reg_rtx (SImode);
5715 switch (mode)
5717 case E_V16QImode:
5718 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5719 break;
5720 case E_V8HImode:
5721 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5722 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5723 break;
5724 case E_V4SFmode:
5725 case E_V4SImode:
5726 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5727 break;
5728 case E_V2DImode:
5729 case E_V2DFmode:
5730 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5731 break;
5732 default:
5733 abort ();
5735 emit_insn (gen_rotqby_ti (rot, from, tmp));
5737 emit_insn (gen_spu_convert (ops[0], rot));
5740 void
5741 spu_builtin_insert (rtx ops[])
5743 machine_mode mode = GET_MODE (ops[0]);
5744 machine_mode imode = GET_MODE_INNER (mode);
5745 rtx mask = gen_reg_rtx (TImode);
5746 rtx offset;
5748 if (GET_CODE (ops[3]) == CONST_INT)
5749 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5750 else
5752 offset = gen_reg_rtx (SImode);
5753 emit_insn (gen_mulsi3
5754 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5756 emit_insn (gen_cpat
5757 (mask, stack_pointer_rtx, offset,
5758 GEN_INT (GET_MODE_SIZE (imode))));
5759 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5762 void
5763 spu_builtin_promote (rtx ops[])
5765 machine_mode mode, imode;
5766 rtx rot, from, offset;
5767 HOST_WIDE_INT pos;
5769 mode = GET_MODE (ops[0]);
5770 imode = GET_MODE_INNER (mode);
5772 from = gen_reg_rtx (TImode);
5773 rot = spu_gen_subreg (TImode, ops[0]);
5775 emit_insn (gen_spu_convert (from, ops[1]));
5777 if (GET_CODE (ops[2]) == CONST_INT)
5779 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5780 if (GET_MODE_SIZE (imode) < 4)
5781 pos += 4 - GET_MODE_SIZE (imode);
5782 offset = GEN_INT (pos & 15);
5784 else
5786 offset = gen_reg_rtx (SImode);
5787 switch (mode)
5789 case E_V16QImode:
5790 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5791 break;
5792 case E_V8HImode:
5793 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5794 emit_insn (gen_addsi3 (offset, offset, offset));
5795 break;
5796 case E_V4SFmode:
5797 case E_V4SImode:
5798 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5799 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5800 break;
5801 case E_V2DImode:
5802 case E_V2DFmode:
5803 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5804 break;
5805 default:
5806 abort ();
5809 emit_insn (gen_rotqby_ti (rot, from, offset));
5812 static void
5813 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5815 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5816 rtx shuf = gen_reg_rtx (V4SImode);
5817 rtx insn = gen_reg_rtx (V4SImode);
5818 rtx shufc;
5819 rtx insnc;
5820 rtx mem;
5822 fnaddr = force_reg (SImode, fnaddr);
5823 cxt = force_reg (SImode, cxt);
5825 if (TARGET_LARGE_MEM)
5827 rtx rotl = gen_reg_rtx (V4SImode);
5828 rtx mask = gen_reg_rtx (V4SImode);
5829 rtx bi = gen_reg_rtx (SImode);
5830 static unsigned char const shufa[16] = {
5831 2, 3, 0, 1, 18, 19, 16, 17,
5832 0, 1, 2, 3, 16, 17, 18, 19
5834 static unsigned char const insna[16] = {
5835 0x41, 0, 0, 79,
5836 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5837 0x60, 0x80, 0, 79,
5838 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5841 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5842 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5844 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5845 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5846 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5847 emit_insn (gen_selb (insn, insnc, rotl, mask));
5849 mem = adjust_address (m_tramp, V4SImode, 0);
5850 emit_move_insn (mem, insn);
5852 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5853 mem = adjust_address (m_tramp, Pmode, 16);
5854 emit_move_insn (mem, bi);
5856 else
5858 rtx scxt = gen_reg_rtx (SImode);
5859 rtx sfnaddr = gen_reg_rtx (SImode);
5860 static unsigned char const insna[16] = {
5861 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5862 0x30, 0, 0, 0,
5863 0, 0, 0, 0,
5864 0, 0, 0, 0
5867 shufc = gen_reg_rtx (TImode);
5868 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5870 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5871 fits 18 bits and the last 4 are zeros. This will be true if
5872 the stack pointer is initialized to 0x3fff0 at program start,
5873 otherwise the ila instruction will be garbage. */
5875 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5876 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5877 emit_insn (gen_cpat
5878 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5879 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5880 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5882 mem = adjust_address (m_tramp, V4SImode, 0);
5883 emit_move_insn (mem, insn);
5885 emit_insn (gen_sync ());
5888 static bool
5889 spu_warn_func_return (tree decl)
5891 /* Naked functions are implemented entirely in assembly, including the
5892 return sequence, so suppress warnings about this. */
5893 return !spu_naked_function_p (decl);
5896 void
5897 spu_expand_sign_extend (rtx ops[])
5899 unsigned char arr[16];
5900 rtx pat = gen_reg_rtx (TImode);
5901 rtx sign, c;
5902 int i, last;
5903 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5904 if (GET_MODE (ops[1]) == QImode)
5906 sign = gen_reg_rtx (HImode);
5907 emit_insn (gen_extendqihi2 (sign, ops[1]));
5908 for (i = 0; i < 16; i++)
5909 arr[i] = 0x12;
5910 arr[last] = 0x13;
5912 else
5914 for (i = 0; i < 16; i++)
5915 arr[i] = 0x10;
5916 switch (GET_MODE (ops[1]))
5918 case E_HImode:
5919 sign = gen_reg_rtx (SImode);
5920 emit_insn (gen_extendhisi2 (sign, ops[1]));
5921 arr[last] = 0x03;
5922 arr[last - 1] = 0x02;
5923 break;
5924 case E_SImode:
5925 sign = gen_reg_rtx (SImode);
5926 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5927 for (i = 0; i < 4; i++)
5928 arr[last - i] = 3 - i;
5929 break;
5930 case E_DImode:
5931 sign = gen_reg_rtx (SImode);
5932 c = gen_reg_rtx (SImode);
5933 emit_insn (gen_spu_convert (c, ops[1]));
5934 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5935 for (i = 0; i < 8; i++)
5936 arr[last - i] = 7 - i;
5937 break;
5938 default:
5939 abort ();
5942 emit_move_insn (pat, array_to_constant (TImode, arr));
5943 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5946 /* expand vector initialization. If there are any constant parts,
5947 load constant parts first. Then load any non-constant parts. */
5948 void
5949 spu_expand_vector_init (rtx target, rtx vals)
5951 machine_mode mode = GET_MODE (target);
5952 int n_elts = GET_MODE_NUNITS (mode);
5953 int n_var = 0;
5954 bool all_same = true;
5955 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5956 int i;
5958 first = XVECEXP (vals, 0, 0);
5959 for (i = 0; i < n_elts; ++i)
5961 x = XVECEXP (vals, 0, i);
5962 if (!(CONST_INT_P (x)
5963 || GET_CODE (x) == CONST_DOUBLE
5964 || GET_CODE (x) == CONST_FIXED))
5965 ++n_var;
5966 else
5968 if (first_constant == NULL_RTX)
5969 first_constant = x;
5971 if (i > 0 && !rtx_equal_p (x, first))
5972 all_same = false;
5975 /* if all elements are the same, use splats to repeat elements */
5976 if (all_same)
5978 if (!CONSTANT_P (first)
5979 && !register_operand (first, GET_MODE (x)))
5980 first = force_reg (GET_MODE (first), first);
5981 emit_insn (gen_spu_splats (target, first));
5982 return;
5985 /* load constant parts */
5986 if (n_var != n_elts)
5988 if (n_var == 0)
5990 emit_move_insn (target,
5991 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5993 else
5995 rtx constant_parts_rtx = copy_rtx (vals);
5997 gcc_assert (first_constant != NULL_RTX);
5998 /* fill empty slots with the first constant, this increases
5999 our chance of using splats in the recursive call below. */
6000 for (i = 0; i < n_elts; ++i)
6002 x = XVECEXP (constant_parts_rtx, 0, i);
6003 if (!(CONST_INT_P (x)
6004 || GET_CODE (x) == CONST_DOUBLE
6005 || GET_CODE (x) == CONST_FIXED))
6006 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6009 spu_expand_vector_init (target, constant_parts_rtx);
6013 /* load variable parts */
6014 if (n_var != 0)
6016 rtx insert_operands[4];
6018 insert_operands[0] = target;
6019 insert_operands[2] = target;
6020 for (i = 0; i < n_elts; ++i)
6022 x = XVECEXP (vals, 0, i);
6023 if (!(CONST_INT_P (x)
6024 || GET_CODE (x) == CONST_DOUBLE
6025 || GET_CODE (x) == CONST_FIXED))
6027 if (!register_operand (x, GET_MODE (x)))
6028 x = force_reg (GET_MODE (x), x);
6029 insert_operands[1] = x;
6030 insert_operands[3] = GEN_INT (i);
6031 spu_builtin_insert (insert_operands);
6037 /* Return insn index for the vector compare instruction for given CODE,
6038 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6040 static int
6041 get_vec_cmp_insn (enum rtx_code code,
6042 machine_mode dest_mode,
6043 machine_mode op_mode)
6046 switch (code)
6048 case EQ:
6049 if (dest_mode == V16QImode && op_mode == V16QImode)
6050 return CODE_FOR_ceq_v16qi;
6051 if (dest_mode == V8HImode && op_mode == V8HImode)
6052 return CODE_FOR_ceq_v8hi;
6053 if (dest_mode == V4SImode && op_mode == V4SImode)
6054 return CODE_FOR_ceq_v4si;
6055 if (dest_mode == V4SImode && op_mode == V4SFmode)
6056 return CODE_FOR_ceq_v4sf;
6057 if (dest_mode == V2DImode && op_mode == V2DFmode)
6058 return CODE_FOR_ceq_v2df;
6059 break;
6060 case GT:
6061 if (dest_mode == V16QImode && op_mode == V16QImode)
6062 return CODE_FOR_cgt_v16qi;
6063 if (dest_mode == V8HImode && op_mode == V8HImode)
6064 return CODE_FOR_cgt_v8hi;
6065 if (dest_mode == V4SImode && op_mode == V4SImode)
6066 return CODE_FOR_cgt_v4si;
6067 if (dest_mode == V4SImode && op_mode == V4SFmode)
6068 return CODE_FOR_cgt_v4sf;
6069 if (dest_mode == V2DImode && op_mode == V2DFmode)
6070 return CODE_FOR_cgt_v2df;
6071 break;
6072 case GTU:
6073 if (dest_mode == V16QImode && op_mode == V16QImode)
6074 return CODE_FOR_clgt_v16qi;
6075 if (dest_mode == V8HImode && op_mode == V8HImode)
6076 return CODE_FOR_clgt_v8hi;
6077 if (dest_mode == V4SImode && op_mode == V4SImode)
6078 return CODE_FOR_clgt_v4si;
6079 break;
6080 default:
6081 break;
6083 return -1;
6086 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6087 DMODE is expected destination mode. This is a recursive function. */
6089 static rtx
6090 spu_emit_vector_compare (enum rtx_code rcode,
6091 rtx op0, rtx op1,
6092 machine_mode dmode)
6094 int vec_cmp_insn;
6095 rtx mask;
6096 machine_mode dest_mode;
6097 machine_mode op_mode = GET_MODE (op1);
6099 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6101 /* Floating point vector compare instructions uses destination V4SImode.
6102 Double floating point vector compare instructions uses destination V2DImode.
6103 Move destination to appropriate mode later. */
6104 if (dmode == V4SFmode)
6105 dest_mode = V4SImode;
6106 else if (dmode == V2DFmode)
6107 dest_mode = V2DImode;
6108 else
6109 dest_mode = dmode;
6111 mask = gen_reg_rtx (dest_mode);
6112 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6114 if (vec_cmp_insn == -1)
6116 bool swap_operands = false;
6117 bool try_again = false;
6118 switch (rcode)
6120 case LT:
6121 rcode = GT;
6122 swap_operands = true;
6123 try_again = true;
6124 break;
6125 case LTU:
6126 rcode = GTU;
6127 swap_operands = true;
6128 try_again = true;
6129 break;
6130 case NE:
6131 case UNEQ:
6132 case UNLE:
6133 case UNLT:
6134 case UNGE:
6135 case UNGT:
6136 case UNORDERED:
6137 /* Treat A != B as ~(A==B). */
6139 enum rtx_code rev_code;
6140 enum insn_code nor_code;
6141 rtx rev_mask;
6143 rev_code = reverse_condition_maybe_unordered (rcode);
6144 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6146 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6147 gcc_assert (nor_code != CODE_FOR_nothing);
6148 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6149 if (dmode != dest_mode)
6151 rtx temp = gen_reg_rtx (dest_mode);
6152 convert_move (temp, mask, 0);
6153 return temp;
6155 return mask;
6157 break;
6158 case GE:
6159 case GEU:
6160 case LE:
6161 case LEU:
6162 /* Try GT/GTU/LT/LTU OR EQ */
6164 rtx c_rtx, eq_rtx;
6165 enum insn_code ior_code;
6166 enum rtx_code new_code;
6168 switch (rcode)
6170 case GE: new_code = GT; break;
6171 case GEU: new_code = GTU; break;
6172 case LE: new_code = LT; break;
6173 case LEU: new_code = LTU; break;
6174 default:
6175 gcc_unreachable ();
6178 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6179 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6181 ior_code = optab_handler (ior_optab, dest_mode);
6182 gcc_assert (ior_code != CODE_FOR_nothing);
6183 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6184 if (dmode != dest_mode)
6186 rtx temp = gen_reg_rtx (dest_mode);
6187 convert_move (temp, mask, 0);
6188 return temp;
6190 return mask;
6192 break;
6193 case LTGT:
6194 /* Try LT OR GT */
6196 rtx lt_rtx, gt_rtx;
6197 enum insn_code ior_code;
6199 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6200 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6202 ior_code = optab_handler (ior_optab, dest_mode);
6203 gcc_assert (ior_code != CODE_FOR_nothing);
6204 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6205 if (dmode != dest_mode)
6207 rtx temp = gen_reg_rtx (dest_mode);
6208 convert_move (temp, mask, 0);
6209 return temp;
6211 return mask;
6213 break;
6214 case ORDERED:
6215 /* Implement as (A==A) & (B==B) */
6217 rtx a_rtx, b_rtx;
6218 enum insn_code and_code;
6220 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6221 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6223 and_code = optab_handler (and_optab, dest_mode);
6224 gcc_assert (and_code != CODE_FOR_nothing);
6225 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6226 if (dmode != dest_mode)
6228 rtx temp = gen_reg_rtx (dest_mode);
6229 convert_move (temp, mask, 0);
6230 return temp;
6232 return mask;
6234 break;
6235 default:
6236 gcc_unreachable ();
6239 /* You only get two chances. */
6240 if (try_again)
6241 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6243 gcc_assert (vec_cmp_insn != -1);
6245 if (swap_operands)
6247 rtx tmp;
6248 tmp = op0;
6249 op0 = op1;
6250 op1 = tmp;
6254 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6255 if (dmode != dest_mode)
6257 rtx temp = gen_reg_rtx (dest_mode);
6258 convert_move (temp, mask, 0);
6259 return temp;
6261 return mask;
6265 /* Emit vector conditional expression.
6266 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6267 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6270 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6271 rtx cond, rtx cc_op0, rtx cc_op1)
6273 machine_mode dest_mode = GET_MODE (dest);
6274 enum rtx_code rcode = GET_CODE (cond);
6275 rtx mask;
6277 /* Get the vector mask for the given relational operations. */
6278 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6280 emit_insn(gen_selb (dest, op2, op1, mask));
6282 return 1;
6285 static rtx
6286 spu_force_reg (machine_mode mode, rtx op)
6288 rtx x, r;
6289 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6291 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6292 || GET_MODE (op) == BLKmode)
6293 return force_reg (mode, convert_to_mode (mode, op, 0));
6294 abort ();
6297 r = force_reg (GET_MODE (op), op);
6298 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6300 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6301 if (x)
6302 return x;
6305 x = gen_reg_rtx (mode);
6306 emit_insn (gen_spu_convert (x, r));
6307 return x;
6310 static void
6311 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6313 HOST_WIDE_INT v = 0;
6314 int lsbits;
6315 /* Check the range of immediate operands. */
6316 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6318 int range = p - SPU_BTI_7;
6320 if (!CONSTANT_P (op))
6321 error ("%s expects an integer literal in the range [%d, %d]",
6322 d->name,
6323 spu_builtin_range[range].low, spu_builtin_range[range].high);
6325 if (GET_CODE (op) == CONST
6326 && (GET_CODE (XEXP (op, 0)) == PLUS
6327 || GET_CODE (XEXP (op, 0)) == MINUS))
6329 v = INTVAL (XEXP (XEXP (op, 0), 1));
6330 op = XEXP (XEXP (op, 0), 0);
6332 else if (GET_CODE (op) == CONST_INT)
6333 v = INTVAL (op);
6334 else if (GET_CODE (op) == CONST_VECTOR
6335 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6336 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6338 /* The default for v is 0 which is valid in every range. */
6339 if (v < spu_builtin_range[range].low
6340 || v > spu_builtin_range[range].high)
6341 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6342 d->name,
6343 spu_builtin_range[range].low, spu_builtin_range[range].high,
6346 switch (p)
6348 case SPU_BTI_S10_4:
6349 lsbits = 4;
6350 break;
6351 case SPU_BTI_U16_2:
6352 /* This is only used in lqa, and stqa. Even though the insns
6353 encode 16 bits of the address (all but the 2 least
6354 significant), only 14 bits are used because it is masked to
6355 be 16 byte aligned. */
6356 lsbits = 4;
6357 break;
6358 case SPU_BTI_S16_2:
6359 /* This is used for lqr and stqr. */
6360 lsbits = 2;
6361 break;
6362 default:
6363 lsbits = 0;
6366 if (GET_CODE (op) == LABEL_REF
6367 || (GET_CODE (op) == SYMBOL_REF
6368 && SYMBOL_REF_FUNCTION_P (op))
6369 || (v & ((1 << lsbits) - 1)) != 0)
6370 warning (0, "%d least significant bits of %s are ignored", lsbits,
6371 d->name);
6376 static int
6377 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6378 rtx target, rtx ops[])
6380 enum insn_code icode = (enum insn_code) d->icode;
6381 int i = 0, a;
6383 /* Expand the arguments into rtl. */
6385 if (d->parm[0] != SPU_BTI_VOID)
6386 ops[i++] = target;
6388 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6390 tree arg = CALL_EXPR_ARG (exp, a);
6391 if (arg == 0)
6392 abort ();
6393 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6396 gcc_assert (i == insn_data[icode].n_generator_args);
6397 return i;
6400 static rtx
6401 spu_expand_builtin_1 (struct spu_builtin_description *d,
6402 tree exp, rtx target)
6404 rtx pat;
6405 rtx ops[8];
6406 enum insn_code icode = (enum insn_code) d->icode;
6407 machine_mode mode, tmode;
6408 int i, p;
6409 int n_operands;
6410 tree return_type;
6412 /* Set up ops[] with values from arglist. */
6413 n_operands = expand_builtin_args (d, exp, target, ops);
6415 /* Handle the target operand which must be operand 0. */
6416 i = 0;
6417 if (d->parm[0] != SPU_BTI_VOID)
6420 /* We prefer the mode specified for the match_operand otherwise
6421 use the mode from the builtin function prototype. */
6422 tmode = insn_data[d->icode].operand[0].mode;
6423 if (tmode == VOIDmode)
6424 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6426 /* Try to use target because not using it can lead to extra copies
6427 and when we are using all of the registers extra copies leads
6428 to extra spills. */
6429 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6430 ops[0] = target;
6431 else
6432 target = ops[0] = gen_reg_rtx (tmode);
6434 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6435 abort ();
6437 i++;
6440 if (d->fcode == SPU_MASK_FOR_LOAD)
6442 machine_mode mode = insn_data[icode].operand[1].mode;
6443 tree arg;
6444 rtx addr, op, pat;
6446 /* get addr */
6447 arg = CALL_EXPR_ARG (exp, 0);
6448 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6449 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6450 addr = memory_address (mode, op);
6452 /* negate addr */
6453 op = gen_reg_rtx (GET_MODE (addr));
6454 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6455 op = gen_rtx_MEM (mode, op);
6457 pat = GEN_FCN (icode) (target, op);
6458 if (!pat)
6459 return 0;
6460 emit_insn (pat);
6461 return target;
6464 /* Ignore align_hint, but still expand it's args in case they have
6465 side effects. */
6466 if (icode == CODE_FOR_spu_align_hint)
6467 return 0;
6469 /* Handle the rest of the operands. */
6470 for (p = 1; i < n_operands; i++, p++)
6472 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6473 mode = insn_data[d->icode].operand[i].mode;
6474 else
6475 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6477 /* mode can be VOIDmode here for labels */
6479 /* For specific intrinsics with an immediate operand, e.g.,
6480 si_ai(), we sometimes need to convert the scalar argument to a
6481 vector argument by splatting the scalar. */
6482 if (VECTOR_MODE_P (mode)
6483 && (GET_CODE (ops[i]) == CONST_INT
6484 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6485 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6487 if (GET_CODE (ops[i]) == CONST_INT)
6488 ops[i] = spu_const (mode, INTVAL (ops[i]));
6489 else
6491 rtx reg = gen_reg_rtx (mode);
6492 machine_mode imode = GET_MODE_INNER (mode);
6493 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6494 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6495 if (imode != GET_MODE (ops[i]))
6496 ops[i] = convert_to_mode (imode, ops[i],
6497 TYPE_UNSIGNED (spu_builtin_types
6498 [d->parm[i]]));
6499 emit_insn (gen_spu_splats (reg, ops[i]));
6500 ops[i] = reg;
6504 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6506 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6507 ops[i] = spu_force_reg (mode, ops[i]);
6510 switch (n_operands)
6512 case 0:
6513 pat = GEN_FCN (icode) (0);
6514 break;
6515 case 1:
6516 pat = GEN_FCN (icode) (ops[0]);
6517 break;
6518 case 2:
6519 pat = GEN_FCN (icode) (ops[0], ops[1]);
6520 break;
6521 case 3:
6522 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6523 break;
6524 case 4:
6525 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6526 break;
6527 case 5:
6528 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6529 break;
6530 case 6:
6531 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6532 break;
6533 default:
6534 abort ();
6537 if (!pat)
6538 abort ();
6540 if (d->type == B_CALL || d->type == B_BISLED)
6541 emit_call_insn (pat);
6542 else if (d->type == B_JUMP)
6544 emit_jump_insn (pat);
6545 emit_barrier ();
6547 else
6548 emit_insn (pat);
6550 return_type = spu_builtin_types[d->parm[0]];
6551 if (d->parm[0] != SPU_BTI_VOID
6552 && GET_MODE (target) != TYPE_MODE (return_type))
6554 /* target is the return value. It should always be the mode of
6555 the builtin function prototype. */
6556 target = spu_force_reg (TYPE_MODE (return_type), target);
6559 return target;
6563 spu_expand_builtin (tree exp,
6564 rtx target,
6565 rtx subtarget ATTRIBUTE_UNUSED,
6566 machine_mode mode ATTRIBUTE_UNUSED,
6567 int ignore ATTRIBUTE_UNUSED)
6569 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6570 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6571 struct spu_builtin_description *d;
6573 if (fcode < NUM_SPU_BUILTINS)
6575 d = &spu_builtins[fcode];
6577 return spu_expand_builtin_1 (d, exp, target);
6579 abort ();
6582 /* Implement targetm.vectorize.builtin_mask_for_load. */
6583 static tree
6584 spu_builtin_mask_for_load (void)
6586 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6589 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6590 static int
6591 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6592 tree vectype,
6593 int misalign ATTRIBUTE_UNUSED)
6595 unsigned elements;
6597 switch (type_of_cost)
6599 case scalar_stmt:
6600 case vector_stmt:
6601 case vector_load:
6602 case vector_store:
6603 case vec_to_scalar:
6604 case scalar_to_vec:
6605 case cond_branch_not_taken:
6606 case vec_perm:
6607 case vec_promote_demote:
6608 return 1;
6610 case scalar_store:
6611 return 10;
6613 case scalar_load:
6614 /* Load + rotate. */
6615 return 2;
6617 case unaligned_load:
6618 return 2;
6620 case cond_branch_taken:
6621 return 6;
6623 case vec_construct:
6624 elements = TYPE_VECTOR_SUBPARTS (vectype);
6625 return elements / 2 + 1;
6627 default:
6628 gcc_unreachable ();
6632 /* Implement targetm.vectorize.init_cost. */
6634 static void *
6635 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6637 unsigned *cost = XNEWVEC (unsigned, 3);
6638 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6639 return cost;
6642 /* Implement targetm.vectorize.add_stmt_cost. */
6644 static unsigned
6645 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6646 struct _stmt_vec_info *stmt_info, int misalign,
6647 enum vect_cost_model_location where)
6649 unsigned *cost = (unsigned *) data;
6650 unsigned retval = 0;
6652 if (flag_vect_cost_model)
6654 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6655 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6657 /* Statements in an inner loop relative to the loop being
6658 vectorized are weighted more heavily. The value here is
6659 arbitrary and could potentially be improved with analysis. */
6660 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6661 count *= 50; /* FIXME. */
6663 retval = (unsigned) (count * stmt_cost);
6664 cost[where] += retval;
6667 return retval;
6670 /* Implement targetm.vectorize.finish_cost. */
6672 static void
6673 spu_finish_cost (void *data, unsigned *prologue_cost,
6674 unsigned *body_cost, unsigned *epilogue_cost)
6676 unsigned *cost = (unsigned *) data;
6677 *prologue_cost = cost[vect_prologue];
6678 *body_cost = cost[vect_body];
6679 *epilogue_cost = cost[vect_epilogue];
6682 /* Implement targetm.vectorize.destroy_cost_data. */
6684 static void
6685 spu_destroy_cost_data (void *data)
6687 free (data);
6690 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6691 after applying N number of iterations. This routine does not determine
6692 how may iterations are required to reach desired alignment. */
6694 static bool
6695 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6697 if (is_packed)
6698 return false;
6700 /* All other types are naturally aligned. */
6701 return true;
6704 /* Return the appropriate mode for a named address pointer. */
6705 static machine_mode
6706 spu_addr_space_pointer_mode (addr_space_t addrspace)
6708 switch (addrspace)
6710 case ADDR_SPACE_GENERIC:
6711 return ptr_mode;
6712 case ADDR_SPACE_EA:
6713 return EAmode;
6714 default:
6715 gcc_unreachable ();
6719 /* Return the appropriate mode for a named address address. */
6720 static machine_mode
6721 spu_addr_space_address_mode (addr_space_t addrspace)
6723 switch (addrspace)
6725 case ADDR_SPACE_GENERIC:
6726 return Pmode;
6727 case ADDR_SPACE_EA:
6728 return EAmode;
6729 default:
6730 gcc_unreachable ();
6734 /* Determine if one named address space is a subset of another. */
6736 static bool
6737 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6739 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6740 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6742 if (subset == superset)
6743 return true;
6745 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6746 being subsets but instead as disjoint address spaces. */
6747 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6748 return false;
6750 else
6751 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6754 /* Convert from one address space to another. */
6755 static rtx
6756 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6758 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6759 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6761 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6762 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6764 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6766 rtx result, ls;
6768 ls = gen_const_mem (DImode,
6769 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6770 set_mem_align (ls, 128);
6772 result = gen_reg_rtx (Pmode);
6773 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6774 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6775 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6776 ls, const0_rtx, Pmode, 1);
6778 emit_insn (gen_subsi3 (result, op, ls));
6780 return result;
6783 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6785 rtx result, ls;
6787 ls = gen_const_mem (DImode,
6788 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6789 set_mem_align (ls, 128);
6791 result = gen_reg_rtx (EAmode);
6792 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6793 op = force_reg (Pmode, op);
6794 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6795 ls, const0_rtx, EAmode, 1);
6796 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6798 if (EAmode == SImode)
6799 emit_insn (gen_addsi3 (result, op, ls));
6800 else
6801 emit_insn (gen_adddi3 (result, op, ls));
6803 return result;
6806 else
6807 gcc_unreachable ();
6811 /* Count the total number of instructions in each pipe and return the
6812 maximum, which is used as the Minimum Iteration Interval (MII)
6813 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6814 -2 are instructions that can go in pipe0 or pipe1. */
6815 static int
6816 spu_sms_res_mii (struct ddg *g)
6818 int i;
6819 unsigned t[4] = {0, 0, 0, 0};
6821 for (i = 0; i < g->num_nodes; i++)
6823 rtx_insn *insn = g->nodes[i].insn;
6824 int p = get_pipe (insn) + 2;
6826 gcc_assert (p >= 0);
6827 gcc_assert (p < 4);
6829 t[p]++;
6830 if (dump_file && INSN_P (insn))
6831 fprintf (dump_file, "i%d %s %d %d\n",
6832 INSN_UID (insn),
6833 insn_data[INSN_CODE(insn)].name,
6834 p, t[p]);
6836 if (dump_file)
6837 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6839 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6843 void
6844 spu_init_expanders (void)
6846 if (cfun)
6848 rtx r0, r1;
6849 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6850 frame_pointer_needed is true. We don't know that until we're
6851 expanding the prologue. */
6852 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6854 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6855 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6856 to be treated as aligned, so generate them here. */
6857 r0 = gen_reg_rtx (SImode);
6858 r1 = gen_reg_rtx (SImode);
6859 mark_reg_pointer (r0, 128);
6860 mark_reg_pointer (r1, 128);
6861 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6862 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6866 static machine_mode
6867 spu_libgcc_cmp_return_mode (void)
6870 /* For SPU word mode is TI mode so it is better to use SImode
6871 for compare returns. */
6872 return SImode;
6875 static machine_mode
6876 spu_libgcc_shift_count_mode (void)
6878 /* For SPU word mode is TI mode so it is better to use SImode
6879 for shift counts. */
6880 return SImode;
6883 /* Implement targetm.section_type_flags. */
6884 static unsigned int
6885 spu_section_type_flags (tree decl, const char *name, int reloc)
6887 /* .toe needs to have type @nobits. */
6888 if (strcmp (name, ".toe") == 0)
6889 return SECTION_BSS;
6890 /* Don't load _ea into the current address space. */
6891 if (strcmp (name, "._ea") == 0)
6892 return SECTION_WRITE | SECTION_DEBUG;
6893 return default_section_type_flags (decl, name, reloc);
6896 /* Implement targetm.select_section. */
6897 static section *
6898 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6900 /* Variables and constants defined in the __ea address space
6901 go into a special section named "._ea". */
6902 if (TREE_TYPE (decl) != error_mark_node
6903 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6905 /* We might get called with string constants, but get_named_section
6906 doesn't like them as they are not DECLs. Also, we need to set
6907 flags in that case. */
6908 if (!DECL_P (decl))
6909 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6911 return get_named_section (decl, "._ea", reloc);
6914 return default_elf_select_section (decl, reloc, align);
6917 /* Implement targetm.unique_section. */
6918 static void
6919 spu_unique_section (tree decl, int reloc)
6921 /* We don't support unique section names in the __ea address
6922 space for now. */
6923 if (TREE_TYPE (decl) != error_mark_node
6924 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6925 return;
6927 default_unique_section (decl, reloc);
6930 /* Generate a constant or register which contains 2^SCALE. We assume
6931 the result is valid for MODE. Currently, MODE must be V4SFmode and
6932 SCALE must be SImode. */
6934 spu_gen_exp2 (machine_mode mode, rtx scale)
6936 gcc_assert (mode == V4SFmode);
6937 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6938 if (GET_CODE (scale) != CONST_INT)
6940 /* unsigned int exp = (127 + scale) << 23;
6941 __vector float m = (__vector float) spu_splats (exp); */
6942 rtx reg = force_reg (SImode, scale);
6943 rtx exp = gen_reg_rtx (SImode);
6944 rtx mul = gen_reg_rtx (mode);
6945 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6946 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6947 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6948 return mul;
6950 else
6952 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6953 unsigned char arr[16];
6954 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6955 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6956 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6957 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6958 return array_to_constant (mode, arr);
6962 /* After reload, just change the convert into a move instruction
6963 or a dead instruction. */
6964 void
6965 spu_split_convert (rtx ops[])
6967 if (REGNO (ops[0]) == REGNO (ops[1]))
6968 emit_note (NOTE_INSN_DELETED);
6969 else
6971 /* Use TImode always as this might help hard reg copyprop. */
6972 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6973 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6974 emit_insn (gen_move_insn (op0, op1));
6978 void
6979 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6981 fprintf (file, "# profile\n");
6982 fprintf (file, "brsl $75, _mcount\n");
6985 /* Implement targetm.ref_may_alias_errno. */
6986 static bool
6987 spu_ref_may_alias_errno (ao_ref *ref)
6989 tree base = ao_ref_base (ref);
6991 /* With SPU newlib, errno is defined as something like
6992 _impure_data._errno
6993 The default implementation of this target macro does not
6994 recognize such expressions, so special-code for it here. */
6996 if (TREE_CODE (base) == VAR_DECL
6997 && !TREE_STATIC (base)
6998 && DECL_EXTERNAL (base)
6999 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7000 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7001 "_impure_data") == 0
7002 /* _errno is the first member of _impure_data. */
7003 && ref->offset == 0)
7004 return true;
7006 return default_ref_may_alias_errno (ref);
7009 /* Output thunk to FILE that implements a C++ virtual function call (with
7010 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7011 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7012 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7013 relative to the resulting this pointer. */
7015 static void
7016 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7017 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7018 tree function)
7020 rtx op[8];
7022 /* Make sure unwind info is emitted for the thunk if needed. */
7023 final_start_function (emit_barrier (), file, 1);
7025 /* Operand 0 is the target function. */
7026 op[0] = XEXP (DECL_RTL (function), 0);
7028 /* Operand 1 is the 'this' pointer. */
7029 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7030 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7031 else
7032 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7034 /* Operands 2/3 are the low/high halfwords of delta. */
7035 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7036 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7038 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7039 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7040 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7042 /* Operands 6/7 are temporary registers. */
7043 op[6] = gen_rtx_REG (Pmode, 79);
7044 op[7] = gen_rtx_REG (Pmode, 78);
7046 /* Add DELTA to this pointer. */
7047 if (delta)
7049 if (delta >= -0x200 && delta < 0x200)
7050 output_asm_insn ("ai\t%1,%1,%2", op);
7051 else if (delta >= -0x8000 && delta < 0x8000)
7053 output_asm_insn ("il\t%6,%2", op);
7054 output_asm_insn ("a\t%1,%1,%6", op);
7056 else
7058 output_asm_insn ("ilhu\t%6,%3", op);
7059 output_asm_insn ("iohl\t%6,%2", op);
7060 output_asm_insn ("a\t%1,%1,%6", op);
7064 /* Perform vcall adjustment. */
7065 if (vcall_offset)
7067 output_asm_insn ("lqd\t%7,0(%1)", op);
7068 output_asm_insn ("rotqby\t%7,%7,%1", op);
7070 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7071 output_asm_insn ("ai\t%7,%7,%4", op);
7072 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7074 output_asm_insn ("il\t%6,%4", op);
7075 output_asm_insn ("a\t%7,%7,%6", op);
7077 else
7079 output_asm_insn ("ilhu\t%6,%5", op);
7080 output_asm_insn ("iohl\t%6,%4", op);
7081 output_asm_insn ("a\t%7,%7,%6", op);
7084 output_asm_insn ("lqd\t%6,0(%7)", op);
7085 output_asm_insn ("rotqby\t%6,%6,%7", op);
7086 output_asm_insn ("a\t%1,%1,%6", op);
7089 /* Jump to target. */
7090 output_asm_insn ("br\t%0", op);
7092 final_end_function ();
7095 /* Canonicalize a comparison from one we don't have to one we do have. */
7096 static void
7097 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7098 bool op0_preserve_value)
7100 if (!op0_preserve_value
7101 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7103 rtx tem = *op0;
7104 *op0 = *op1;
7105 *op1 = tem;
7106 *code = (int)swap_condition ((enum rtx_code)*code);
7110 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7111 to perform. MEM is the memory on which to operate. VAL is the second
7112 operand of the binary operator. BEFORE and AFTER are optional locations to
7113 return the value of MEM either before of after the operation. */
7114 void
7115 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7116 rtx orig_before, rtx orig_after)
7118 machine_mode mode = GET_MODE (mem);
7119 rtx before = orig_before, after = orig_after;
7121 if (before == NULL_RTX)
7122 before = gen_reg_rtx (mode);
7124 emit_move_insn (before, mem);
7126 if (code == MULT) /* NAND operation */
7128 rtx x = expand_simple_binop (mode, AND, before, val,
7129 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7130 after = expand_simple_unop (mode, NOT, x, after, 1);
7132 else
7134 after = expand_simple_binop (mode, code, before, val,
7135 after, 1, OPTAB_LIB_WIDEN);
7138 emit_move_insn (mem, after);
7140 if (orig_after && after != orig_after)
7141 emit_move_insn (orig_after, after);
7145 /* Table of machine attributes. */
7146 static const struct attribute_spec spu_attribute_table[] =
7148 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7149 affects_type_identity } */
7150 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7151 false },
7152 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7153 false },
7154 { NULL, 0, 0, false, false, false, NULL, false }
7157 /* TARGET overrides. */
7159 #undef TARGET_LRA_P
7160 #define TARGET_LRA_P hook_bool_void_false
7162 #undef TARGET_ADDR_SPACE_POINTER_MODE
7163 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7165 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7166 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7168 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7169 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7170 spu_addr_space_legitimate_address_p
7172 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7173 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7175 #undef TARGET_ADDR_SPACE_SUBSET_P
7176 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7178 #undef TARGET_ADDR_SPACE_CONVERT
7179 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7181 #undef TARGET_INIT_BUILTINS
7182 #define TARGET_INIT_BUILTINS spu_init_builtins
7183 #undef TARGET_BUILTIN_DECL
7184 #define TARGET_BUILTIN_DECL spu_builtin_decl
7186 #undef TARGET_EXPAND_BUILTIN
7187 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7189 #undef TARGET_UNWIND_WORD_MODE
7190 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7192 #undef TARGET_LEGITIMIZE_ADDRESS
7193 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7195 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7196 and .quad for the debugger. When it is known that the assembler is fixed,
7197 these can be removed. */
7198 #undef TARGET_ASM_UNALIGNED_SI_OP
7199 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7201 #undef TARGET_ASM_ALIGNED_DI_OP
7202 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7204 /* The .8byte directive doesn't seem to work well for a 32 bit
7205 architecture. */
7206 #undef TARGET_ASM_UNALIGNED_DI_OP
7207 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7209 #undef TARGET_RTX_COSTS
7210 #define TARGET_RTX_COSTS spu_rtx_costs
7212 #undef TARGET_ADDRESS_COST
7213 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7215 #undef TARGET_SCHED_ISSUE_RATE
7216 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7218 #undef TARGET_SCHED_INIT_GLOBAL
7219 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7221 #undef TARGET_SCHED_INIT
7222 #define TARGET_SCHED_INIT spu_sched_init
7224 #undef TARGET_SCHED_VARIABLE_ISSUE
7225 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7227 #undef TARGET_SCHED_REORDER
7228 #define TARGET_SCHED_REORDER spu_sched_reorder
7230 #undef TARGET_SCHED_REORDER2
7231 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7233 #undef TARGET_SCHED_ADJUST_COST
7234 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7236 #undef TARGET_ATTRIBUTE_TABLE
7237 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7239 #undef TARGET_ASM_INTEGER
7240 #define TARGET_ASM_INTEGER spu_assemble_integer
7242 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7243 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7245 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7246 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7248 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7249 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7251 #undef TARGET_ASM_GLOBALIZE_LABEL
7252 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7254 #undef TARGET_PASS_BY_REFERENCE
7255 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7257 #undef TARGET_FUNCTION_ARG
7258 #define TARGET_FUNCTION_ARG spu_function_arg
7260 #undef TARGET_FUNCTION_ARG_ADVANCE
7261 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7263 #undef TARGET_MUST_PASS_IN_STACK
7264 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7266 #undef TARGET_BUILD_BUILTIN_VA_LIST
7267 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7269 #undef TARGET_EXPAND_BUILTIN_VA_START
7270 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7272 #undef TARGET_SETUP_INCOMING_VARARGS
7273 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7275 #undef TARGET_MACHINE_DEPENDENT_REORG
7276 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7278 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7279 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7281 #undef TARGET_INIT_LIBFUNCS
7282 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7284 #undef TARGET_RETURN_IN_MEMORY
7285 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7287 #undef TARGET_ENCODE_SECTION_INFO
7288 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7290 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7291 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7293 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7294 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7296 #undef TARGET_VECTORIZE_INIT_COST
7297 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7299 #undef TARGET_VECTORIZE_ADD_STMT_COST
7300 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7302 #undef TARGET_VECTORIZE_FINISH_COST
7303 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7305 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7306 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7308 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7309 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7311 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7312 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7314 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7315 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7317 #undef TARGET_SCHED_SMS_RES_MII
7318 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7320 #undef TARGET_SECTION_TYPE_FLAGS
7321 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7323 #undef TARGET_ASM_SELECT_SECTION
7324 #define TARGET_ASM_SELECT_SECTION spu_select_section
7326 #undef TARGET_ASM_UNIQUE_SECTION
7327 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7329 #undef TARGET_LEGITIMATE_ADDRESS_P
7330 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7332 #undef TARGET_LEGITIMATE_CONSTANT_P
7333 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7335 #undef TARGET_TRAMPOLINE_INIT
7336 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7338 #undef TARGET_WARN_FUNC_RETURN
7339 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7341 #undef TARGET_OPTION_OVERRIDE
7342 #define TARGET_OPTION_OVERRIDE spu_option_override
7344 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7345 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7347 #undef TARGET_REF_MAY_ALIAS_ERRNO
7348 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7350 #undef TARGET_ASM_OUTPUT_MI_THUNK
7351 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7352 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7353 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7355 /* Variable tracking should be run after all optimizations which
7356 change order of insns. It also needs a valid CFG. */
7357 #undef TARGET_DELAY_VARTRACK
7358 #define TARGET_DELAY_VARTRACK true
7360 #undef TARGET_CANONICALIZE_COMPARISON
7361 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7363 #undef TARGET_CAN_USE_DOLOOP_P
7364 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7366 struct gcc_target targetm = TARGET_INITIALIZER;
7368 #include "gt-spu.h"