2018-07-04 Denys Vlasenko <dvlasenk@redhat.com>
[official-gcc.git] / gcc / config / spu / spu.c
blobfe2a2a34a0590c30d185fa408505f8a155313e81
1 /* Copyright (C) 2006-2018 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #define IN_TARGET_CODE 1
19 #include "config.h"
20 #include "system.h"
21 #include "coretypes.h"
22 #include "backend.h"
23 #include "target.h"
24 #include "rtl.h"
25 #include "tree.h"
26 #include "gimple.h"
27 #include "cfghooks.h"
28 #include "cfgloop.h"
29 #include "df.h"
30 #include "memmodel.h"
31 #include "tm_p.h"
32 #include "stringpool.h"
33 #include "attribs.h"
34 #include "expmed.h"
35 #include "optabs.h"
36 #include "regs.h"
37 #include "emit-rtl.h"
38 #include "recog.h"
39 #include "diagnostic-core.h"
40 #include "insn-attr.h"
41 #include "alias.h"
42 #include "fold-const.h"
43 #include "stor-layout.h"
44 #include "calls.h"
45 #include "varasm.h"
46 #include "explow.h"
47 #include "expr.h"
48 #include "output.h"
49 #include "cfgrtl.h"
50 #include "cfgbuild.h"
51 #include "langhooks.h"
52 #include "reload.h"
53 #include "sched-int.h"
54 #include "params.h"
55 #include "gimplify.h"
56 #include "tm-constrs.h"
57 #include "ddg.h"
58 #include "dumpfile.h"
59 #include "builtins.h"
60 #include "rtl-iter.h"
61 #include "flags.h"
62 #include "toplev.h"
64 /* This file should be included last. */
65 #include "target-def.h"
67 /* Builtin types, data and prototypes. */
69 enum spu_builtin_type_index
71 SPU_BTI_END_OF_PARAMS,
73 /* We create new type nodes for these. */
74 SPU_BTI_V16QI,
75 SPU_BTI_V8HI,
76 SPU_BTI_V4SI,
77 SPU_BTI_V2DI,
78 SPU_BTI_V4SF,
79 SPU_BTI_V2DF,
80 SPU_BTI_UV16QI,
81 SPU_BTI_UV8HI,
82 SPU_BTI_UV4SI,
83 SPU_BTI_UV2DI,
85 /* A 16-byte type. (Implemented with V16QI_type_node) */
86 SPU_BTI_QUADWORD,
88 /* These all correspond to intSI_type_node */
89 SPU_BTI_7,
90 SPU_BTI_S7,
91 SPU_BTI_U7,
92 SPU_BTI_S10,
93 SPU_BTI_S10_4,
94 SPU_BTI_U14,
95 SPU_BTI_16,
96 SPU_BTI_S16,
97 SPU_BTI_S16_2,
98 SPU_BTI_U16,
99 SPU_BTI_U16_2,
100 SPU_BTI_U18,
102 /* These correspond to the standard types */
103 SPU_BTI_INTQI,
104 SPU_BTI_INTHI,
105 SPU_BTI_INTSI,
106 SPU_BTI_INTDI,
108 SPU_BTI_UINTQI,
109 SPU_BTI_UINTHI,
110 SPU_BTI_UINTSI,
111 SPU_BTI_UINTDI,
113 SPU_BTI_FLOAT,
114 SPU_BTI_DOUBLE,
116 SPU_BTI_VOID,
117 SPU_BTI_PTR,
119 SPU_BTI_MAX
122 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
123 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
124 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
125 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
126 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
127 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
128 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
129 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
130 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
131 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
133 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
135 struct spu_builtin_range
137 int low, high;
140 static struct spu_builtin_range spu_builtin_range[] = {
141 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
142 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
143 {0ll, 0x7fll}, /* SPU_BTI_U7 */
144 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
145 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
146 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
147 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
148 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
149 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
150 {0ll, 0xffffll}, /* SPU_BTI_U16 */
151 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
152 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
156 /* Target specific attribute specifications. */
157 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
159 /* Prototypes and external defs. */
160 static int get_pipe (rtx_insn *insn);
161 static int spu_naked_function_p (tree func);
162 static int mem_is_padded_component_ref (rtx x);
163 static void fix_range (const char *);
164 static rtx spu_expand_load (rtx, rtx, rtx, int);
166 /* Which instruction set architecture to use. */
167 int spu_arch;
168 /* Which cpu are we tuning for. */
169 int spu_tune;
171 /* The hardware requires 8 insns between a hint and the branch it
172 effects. This variable describes how many rtl instructions the
173 compiler needs to see before inserting a hint, and then the compiler
174 will insert enough nops to make it at least 8 insns. The default is
175 for the compiler to allow up to 2 nops be emitted. The nops are
176 inserted in pairs, so we round down. */
177 int spu_hint_dist = (8*4) - (2*4);
179 enum spu_immediate {
180 SPU_NONE,
181 SPU_IL,
182 SPU_ILA,
183 SPU_ILH,
184 SPU_ILHU,
185 SPU_ORI,
186 SPU_ORHI,
187 SPU_ORBI,
188 SPU_IOHL
190 enum immediate_class
192 IC_POOL, /* constant pool */
193 IC_IL1, /* one il* instruction */
194 IC_IL2, /* both ilhu and iohl instructions */
195 IC_IL1s, /* one il* instruction */
196 IC_IL2s, /* both ilhu and iohl instructions */
197 IC_FSMBI, /* the fsmbi instruction */
198 IC_CPAT, /* one of the c*d instructions */
199 IC_FSMBI2 /* fsmbi plus 1 other instruction */
202 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
203 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
204 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
205 static enum immediate_class classify_immediate (rtx op,
206 machine_mode mode);
208 /* Pointer mode for __ea references. */
209 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
212 /* Define the structure for the machine field in struct function. */
213 struct GTY(()) machine_function
215 /* Register to use for PIC accesses. */
216 rtx pic_reg;
219 /* How to allocate a 'struct machine_function'. */
220 static struct machine_function *
221 spu_init_machine_status (void)
223 return ggc_cleared_alloc<machine_function> ();
226 /* Implement TARGET_OPTION_OVERRIDE. */
227 static void
228 spu_option_override (void)
230 /* Set up function hooks. */
231 init_machine_status = spu_init_machine_status;
233 /* Small loops will be unpeeled at -O3. For SPU it is more important
234 to keep code small by default. */
235 if (!flag_unroll_loops && !flag_peel_loops)
236 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
237 global_options.x_param_values,
238 global_options_set.x_param_values);
240 flag_omit_frame_pointer = 1;
242 /* Functions must be 8 byte aligned so we correctly handle dual issue */
243 parse_alignment_opts ();
244 if (align_functions_value < 8)
245 str_align_functions = "8";
247 spu_hint_dist = 8*4 - spu_max_nops*4;
248 if (spu_hint_dist < 0)
249 spu_hint_dist = 0;
251 if (spu_fixed_range_string)
252 fix_range (spu_fixed_range_string);
254 /* Determine processor architectural level. */
255 if (spu_arch_string)
257 if (strcmp (&spu_arch_string[0], "cell") == 0)
258 spu_arch = PROCESSOR_CELL;
259 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
260 spu_arch = PROCESSOR_CELLEDP;
261 else
262 error ("bad value (%s) for -march= switch", spu_arch_string);
265 /* Determine processor to tune for. */
266 if (spu_tune_string)
268 if (strcmp (&spu_tune_string[0], "cell") == 0)
269 spu_tune = PROCESSOR_CELL;
270 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
271 spu_tune = PROCESSOR_CELLEDP;
272 else
273 error ("bad value (%s) for -mtune= switch", spu_tune_string);
276 /* Change defaults according to the processor architecture. */
277 if (spu_arch == PROCESSOR_CELLEDP)
279 /* If no command line option has been otherwise specified, change
280 the default to -mno-safe-hints on celledp -- only the original
281 Cell/B.E. processors require this workaround. */
282 if (!(target_flags_explicit & MASK_SAFE_HINTS))
283 target_flags &= ~MASK_SAFE_HINTS;
286 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
289 /* Implement TARGET_HARD_REGNO_NREGS. */
291 static unsigned int
292 spu_hard_regno_nregs (unsigned int, machine_mode mode)
294 return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
297 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
298 struct attribute_spec.handler. */
300 /* True if MODE is valid for the target. By "valid", we mean able to
301 be manipulated in non-trivial ways. In particular, this means all
302 the arithmetic is supported. */
303 static bool
304 spu_scalar_mode_supported_p (scalar_mode mode)
306 switch (mode)
308 case E_QImode:
309 case E_HImode:
310 case E_SImode:
311 case E_SFmode:
312 case E_DImode:
313 case E_TImode:
314 case E_DFmode:
315 return true;
317 default:
318 return false;
322 /* Similarly for vector modes. "Supported" here is less strict. At
323 least some operations are supported; need to check optabs or builtins
324 for further details. */
325 static bool
326 spu_vector_mode_supported_p (machine_mode mode)
328 switch (mode)
330 case E_V16QImode:
331 case E_V8HImode:
332 case E_V4SImode:
333 case E_V2DImode:
334 case E_V4SFmode:
335 case E_V2DFmode:
336 return true;
338 default:
339 return false;
343 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
344 least significant bytes of the outer mode. This function returns
345 TRUE for the SUBREG's where this is correct. */
347 valid_subreg (rtx op)
349 machine_mode om = GET_MODE (op);
350 machine_mode im = GET_MODE (SUBREG_REG (op));
351 return om != VOIDmode && im != VOIDmode
352 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
353 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
354 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
357 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
358 and adjust the start offset. */
359 static rtx
360 adjust_operand (rtx op, HOST_WIDE_INT * start)
362 machine_mode mode;
363 int op_size;
364 /* Strip any paradoxical SUBREG. */
365 if (GET_CODE (op) == SUBREG
366 && (GET_MODE_BITSIZE (GET_MODE (op))
367 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
369 if (start)
370 *start -=
371 GET_MODE_BITSIZE (GET_MODE (op)) -
372 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
373 op = SUBREG_REG (op);
375 /* If it is smaller than SI, assure a SUBREG */
376 op_size = GET_MODE_BITSIZE (GET_MODE (op));
377 if (op_size < 32)
379 if (start)
380 *start += 32 - op_size;
381 op_size = 32;
383 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
384 mode = int_mode_for_size (op_size, 0).require ();
385 if (mode != GET_MODE (op))
386 op = gen_rtx_SUBREG (mode, op, 0);
387 return op;
390 void
391 spu_expand_extv (rtx ops[], int unsignedp)
393 rtx dst = ops[0], src = ops[1];
394 HOST_WIDE_INT width = INTVAL (ops[2]);
395 HOST_WIDE_INT start = INTVAL (ops[3]);
396 HOST_WIDE_INT align_mask;
397 rtx s0, s1, mask, r0;
399 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
401 if (MEM_P (src))
403 /* First, determine if we need 1 TImode load or 2. We need only 1
404 if the bits being extracted do not cross the alignment boundary
405 as determined by the MEM and its address. */
407 align_mask = -MEM_ALIGN (src);
408 if ((start & align_mask) == ((start + width - 1) & align_mask))
410 /* Alignment is sufficient for 1 load. */
411 s0 = gen_reg_rtx (TImode);
412 r0 = spu_expand_load (s0, 0, src, start / 8);
413 start &= 7;
414 if (r0)
415 emit_insn (gen_rotqby_ti (s0, s0, r0));
417 else
419 /* Need 2 loads. */
420 s0 = gen_reg_rtx (TImode);
421 s1 = gen_reg_rtx (TImode);
422 r0 = spu_expand_load (s0, s1, src, start / 8);
423 start &= 7;
425 gcc_assert (start + width <= 128);
426 if (r0)
428 rtx r1 = gen_reg_rtx (SImode);
429 mask = gen_reg_rtx (TImode);
430 emit_move_insn (mask, GEN_INT (-1));
431 emit_insn (gen_rotqby_ti (s0, s0, r0));
432 emit_insn (gen_rotqby_ti (s1, s1, r0));
433 if (GET_CODE (r0) == CONST_INT)
434 r1 = GEN_INT (INTVAL (r0) & 15);
435 else
436 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
437 emit_insn (gen_shlqby_ti (mask, mask, r1));
438 emit_insn (gen_selb (s0, s1, s0, mask));
443 else if (GET_CODE (src) == SUBREG)
445 rtx r = SUBREG_REG (src);
446 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
447 s0 = gen_reg_rtx (TImode);
448 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
449 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
450 else
451 emit_move_insn (s0, src);
453 else
455 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
456 s0 = gen_reg_rtx (TImode);
457 emit_move_insn (s0, src);
460 /* Now s0 is TImode and contains the bits to extract at start. */
462 if (start)
463 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
465 if (128 - width)
466 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
468 emit_move_insn (dst, s0);
471 void
472 spu_expand_insv (rtx ops[])
474 HOST_WIDE_INT width = INTVAL (ops[1]);
475 HOST_WIDE_INT start = INTVAL (ops[2]);
476 unsigned HOST_WIDE_INT maskbits;
477 machine_mode dst_mode;
478 rtx dst = ops[0], src = ops[3];
479 int dst_size;
480 rtx mask;
481 rtx shift_reg;
482 int shift;
485 if (GET_CODE (ops[0]) == MEM)
486 dst = gen_reg_rtx (TImode);
487 else
488 dst = adjust_operand (dst, &start);
489 dst_mode = GET_MODE (dst);
490 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
492 if (CONSTANT_P (src))
494 machine_mode m =
495 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
496 src = force_reg (m, convert_to_mode (m, src, 0));
498 src = adjust_operand (src, 0);
500 mask = gen_reg_rtx (dst_mode);
501 shift_reg = gen_reg_rtx (dst_mode);
502 shift = dst_size - start - width;
504 /* It's not safe to use subreg here because the compiler assumes
505 that the SUBREG_REG is right justified in the SUBREG. */
506 convert_move (shift_reg, src, 1);
508 if (shift > 0)
510 switch (dst_mode)
512 case E_SImode:
513 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
514 break;
515 case E_DImode:
516 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
517 break;
518 case E_TImode:
519 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
520 break;
521 default:
522 abort ();
525 else if (shift < 0)
526 abort ();
528 switch (dst_size)
530 case 32:
531 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
532 if (start)
533 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
534 emit_move_insn (mask, GEN_INT (maskbits));
535 break;
536 case 64:
537 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
538 if (start)
539 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
540 emit_move_insn (mask, GEN_INT (maskbits));
541 break;
542 case 128:
544 unsigned char arr[16];
545 int i = start / 8;
546 memset (arr, 0, sizeof (arr));
547 arr[i] = 0xff >> (start & 7);
548 for (i++; i <= (start + width - 1) / 8; i++)
549 arr[i] = 0xff;
550 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
551 emit_move_insn (mask, array_to_constant (TImode, arr));
553 break;
554 default:
555 abort ();
557 if (GET_CODE (ops[0]) == MEM)
559 rtx low = gen_reg_rtx (SImode);
560 rtx rotl = gen_reg_rtx (SImode);
561 rtx mask0 = gen_reg_rtx (TImode);
562 rtx addr;
563 rtx addr0;
564 rtx addr1;
565 rtx mem;
567 addr = force_reg (Pmode, XEXP (ops[0], 0));
568 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
569 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
570 emit_insn (gen_negsi2 (rotl, low));
571 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
572 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
573 mem = change_address (ops[0], TImode, addr0);
574 set_mem_alias_set (mem, 0);
575 emit_move_insn (dst, mem);
576 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
577 if (start + width > MEM_ALIGN (ops[0]))
579 rtx shl = gen_reg_rtx (SImode);
580 rtx mask1 = gen_reg_rtx (TImode);
581 rtx dst1 = gen_reg_rtx (TImode);
582 rtx mem1;
583 addr1 = plus_constant (Pmode, addr, 16);
584 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
585 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
586 emit_insn (gen_shlqby_ti (mask1, mask, shl));
587 mem1 = change_address (ops[0], TImode, addr1);
588 set_mem_alias_set (mem1, 0);
589 emit_move_insn (dst1, mem1);
590 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
591 emit_move_insn (mem1, dst1);
593 emit_move_insn (mem, dst);
595 else
596 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
601 spu_expand_block_move (rtx ops[])
603 HOST_WIDE_INT bytes, align, offset;
604 rtx src, dst, sreg, dreg, target;
605 int i;
606 if (GET_CODE (ops[2]) != CONST_INT
607 || GET_CODE (ops[3]) != CONST_INT
608 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
609 return 0;
611 bytes = INTVAL (ops[2]);
612 align = INTVAL (ops[3]);
614 if (bytes <= 0)
615 return 1;
617 dst = ops[0];
618 src = ops[1];
620 if (align == 16)
622 for (offset = 0; offset + 16 <= bytes; offset += 16)
624 dst = adjust_address (ops[0], V16QImode, offset);
625 src = adjust_address (ops[1], V16QImode, offset);
626 emit_move_insn (dst, src);
628 if (offset < bytes)
630 rtx mask;
631 unsigned char arr[16] = { 0 };
632 for (i = 0; i < bytes - offset; i++)
633 arr[i] = 0xff;
634 dst = adjust_address (ops[0], V16QImode, offset);
635 src = adjust_address (ops[1], V16QImode, offset);
636 mask = gen_reg_rtx (V16QImode);
637 sreg = gen_reg_rtx (V16QImode);
638 dreg = gen_reg_rtx (V16QImode);
639 target = gen_reg_rtx (V16QImode);
640 emit_move_insn (mask, array_to_constant (V16QImode, arr));
641 emit_move_insn (dreg, dst);
642 emit_move_insn (sreg, src);
643 emit_insn (gen_selb (target, dreg, sreg, mask));
644 emit_move_insn (dst, target);
646 return 1;
648 return 0;
651 enum spu_comp_code
652 { SPU_EQ, SPU_GT, SPU_GTU };
654 int spu_comp_icode[12][3] = {
655 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
656 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
657 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
658 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
659 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
660 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
661 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
662 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
663 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
664 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
665 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
666 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
669 /* Generate a compare for CODE. Return a brand-new rtx that represents
670 the result of the compare. GCC can figure this out too if we don't
671 provide all variations of compares, but GCC always wants to use
672 WORD_MODE, we can generate better code in most cases if we do it
673 ourselves. */
674 void
675 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
677 int reverse_compare = 0;
678 int reverse_test = 0;
679 rtx compare_result, eq_result;
680 rtx comp_rtx, eq_rtx;
681 machine_mode comp_mode;
682 machine_mode op_mode;
683 enum spu_comp_code scode, eq_code;
684 enum insn_code ior_code;
685 enum rtx_code code = GET_CODE (cmp);
686 rtx op0 = XEXP (cmp, 0);
687 rtx op1 = XEXP (cmp, 1);
688 int index;
689 int eq_test = 0;
691 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
692 and so on, to keep the constant in operand 1. */
693 if (GET_CODE (op1) == CONST_INT)
695 HOST_WIDE_INT val = INTVAL (op1) - 1;
696 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
697 switch (code)
699 case GE:
700 op1 = GEN_INT (val);
701 code = GT;
702 break;
703 case LT:
704 op1 = GEN_INT (val);
705 code = LE;
706 break;
707 case GEU:
708 op1 = GEN_INT (val);
709 code = GTU;
710 break;
711 case LTU:
712 op1 = GEN_INT (val);
713 code = LEU;
714 break;
715 default:
716 break;
720 /* However, if we generate an integer result, performing a reverse test
721 would require an extra negation, so avoid that where possible. */
722 if (GET_CODE (op1) == CONST_INT && is_set == 1)
724 HOST_WIDE_INT val = INTVAL (op1) + 1;
725 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
726 switch (code)
728 case LE:
729 op1 = GEN_INT (val);
730 code = LT;
731 break;
732 case LEU:
733 op1 = GEN_INT (val);
734 code = LTU;
735 break;
736 default:
737 break;
741 comp_mode = SImode;
742 op_mode = GET_MODE (op0);
744 switch (code)
746 case GE:
747 scode = SPU_GT;
748 if (HONOR_NANS (op_mode))
750 reverse_compare = 0;
751 reverse_test = 0;
752 eq_test = 1;
753 eq_code = SPU_EQ;
755 else
757 reverse_compare = 1;
758 reverse_test = 1;
760 break;
761 case LE:
762 scode = SPU_GT;
763 if (HONOR_NANS (op_mode))
765 reverse_compare = 1;
766 reverse_test = 0;
767 eq_test = 1;
768 eq_code = SPU_EQ;
770 else
772 reverse_compare = 0;
773 reverse_test = 1;
775 break;
776 case LT:
777 reverse_compare = 1;
778 reverse_test = 0;
779 scode = SPU_GT;
780 break;
781 case GEU:
782 reverse_compare = 1;
783 reverse_test = 1;
784 scode = SPU_GTU;
785 break;
786 case LEU:
787 reverse_compare = 0;
788 reverse_test = 1;
789 scode = SPU_GTU;
790 break;
791 case LTU:
792 reverse_compare = 1;
793 reverse_test = 0;
794 scode = SPU_GTU;
795 break;
796 case NE:
797 reverse_compare = 0;
798 reverse_test = 1;
799 scode = SPU_EQ;
800 break;
802 case EQ:
803 scode = SPU_EQ;
804 break;
805 case GT:
806 scode = SPU_GT;
807 break;
808 case GTU:
809 scode = SPU_GTU;
810 break;
811 default:
812 scode = SPU_EQ;
813 break;
816 switch (op_mode)
818 case E_QImode:
819 index = 0;
820 comp_mode = QImode;
821 break;
822 case E_HImode:
823 index = 1;
824 comp_mode = HImode;
825 break;
826 case E_SImode:
827 index = 2;
828 break;
829 case E_DImode:
830 index = 3;
831 break;
832 case E_TImode:
833 index = 4;
834 break;
835 case E_SFmode:
836 index = 5;
837 break;
838 case E_DFmode:
839 index = 6;
840 break;
841 case E_V16QImode:
842 index = 7;
843 comp_mode = op_mode;
844 break;
845 case E_V8HImode:
846 index = 8;
847 comp_mode = op_mode;
848 break;
849 case E_V4SImode:
850 index = 9;
851 comp_mode = op_mode;
852 break;
853 case E_V4SFmode:
854 index = 10;
855 comp_mode = V4SImode;
856 break;
857 case E_V2DFmode:
858 index = 11;
859 comp_mode = V2DImode;
860 break;
861 case E_V2DImode:
862 default:
863 abort ();
866 if (GET_MODE (op1) == DFmode
867 && (scode != SPU_GT && scode != SPU_EQ))
868 abort ();
870 if (is_set == 0 && op1 == const0_rtx
871 && (GET_MODE (op0) == SImode
872 || GET_MODE (op0) == HImode
873 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
875 /* Don't need to set a register with the result when we are
876 comparing against zero and branching. */
877 reverse_test = !reverse_test;
878 compare_result = op0;
880 else
882 compare_result = gen_reg_rtx (comp_mode);
884 if (reverse_compare)
886 rtx t = op1;
887 op1 = op0;
888 op0 = t;
891 if (spu_comp_icode[index][scode] == 0)
892 abort ();
894 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
895 (op0, op_mode))
896 op0 = force_reg (op_mode, op0);
897 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
898 (op1, op_mode))
899 op1 = force_reg (op_mode, op1);
900 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
901 op0, op1);
902 if (comp_rtx == 0)
903 abort ();
904 emit_insn (comp_rtx);
906 if (eq_test)
908 eq_result = gen_reg_rtx (comp_mode);
909 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
910 op0, op1);
911 if (eq_rtx == 0)
912 abort ();
913 emit_insn (eq_rtx);
914 ior_code = optab_handler (ior_optab, comp_mode);
915 gcc_assert (ior_code != CODE_FOR_nothing);
916 emit_insn (GEN_FCN (ior_code)
917 (compare_result, compare_result, eq_result));
921 if (is_set == 0)
923 rtx bcomp;
924 rtx loc_ref;
926 /* We don't have branch on QI compare insns, so we convert the
927 QI compare result to a HI result. */
928 if (comp_mode == QImode)
930 rtx old_res = compare_result;
931 compare_result = gen_reg_rtx (HImode);
932 comp_mode = HImode;
933 emit_insn (gen_extendqihi2 (compare_result, old_res));
936 if (reverse_test)
937 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
938 else
939 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
941 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
942 emit_jump_insn (gen_rtx_SET (pc_rtx,
943 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
944 loc_ref, pc_rtx)));
946 else if (is_set == 2)
948 rtx target = operands[0];
949 int compare_size = GET_MODE_BITSIZE (comp_mode);
950 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
951 machine_mode mode = int_mode_for_size (target_size, 0).require ();
952 rtx select_mask;
953 rtx op_t = operands[2];
954 rtx op_f = operands[3];
956 /* The result of the comparison can be SI, HI or QI mode. Create a
957 mask based on that result. */
958 if (target_size > compare_size)
960 select_mask = gen_reg_rtx (mode);
961 emit_insn (gen_extend_compare (select_mask, compare_result));
963 else if (target_size < compare_size)
964 select_mask =
965 gen_rtx_SUBREG (mode, compare_result,
966 (compare_size - target_size) / BITS_PER_UNIT);
967 else if (comp_mode != mode)
968 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
969 else
970 select_mask = compare_result;
972 if (GET_MODE (target) != GET_MODE (op_t)
973 || GET_MODE (target) != GET_MODE (op_f))
974 abort ();
976 if (reverse_test)
977 emit_insn (gen_selb (target, op_t, op_f, select_mask));
978 else
979 emit_insn (gen_selb (target, op_f, op_t, select_mask));
981 else
983 rtx target = operands[0];
984 if (reverse_test)
985 emit_insn (gen_rtx_SET (compare_result,
986 gen_rtx_NOT (comp_mode, compare_result)));
987 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
988 emit_insn (gen_extendhisi2 (target, compare_result));
989 else if (GET_MODE (target) == SImode
990 && GET_MODE (compare_result) == QImode)
991 emit_insn (gen_extend_compare (target, compare_result));
992 else
993 emit_move_insn (target, compare_result);
997 HOST_WIDE_INT
998 const_double_to_hwint (rtx x)
1000 HOST_WIDE_INT val;
1001 if (GET_MODE (x) == SFmode)
1002 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
1003 else if (GET_MODE (x) == DFmode)
1005 long l[2];
1006 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
1007 val = l[0];
1008 val = (val << 32) | (l[1] & 0xffffffff);
1010 else
1011 abort ();
1012 return val;
1016 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1018 long tv[2];
1019 REAL_VALUE_TYPE rv;
1020 gcc_assert (mode == SFmode || mode == DFmode);
1022 if (mode == SFmode)
1023 tv[0] = (v << 32) >> 32;
1024 else if (mode == DFmode)
1026 tv[1] = (v << 32) >> 32;
1027 tv[0] = v >> 32;
1029 real_from_target (&rv, tv, mode);
1030 return const_double_from_real_value (rv, mode);
1033 void
1034 print_operand_address (FILE * file, register rtx addr)
1036 rtx reg;
1037 rtx offset;
1039 if (GET_CODE (addr) == AND
1040 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1041 && INTVAL (XEXP (addr, 1)) == -16)
1042 addr = XEXP (addr, 0);
1044 switch (GET_CODE (addr))
1046 case REG:
1047 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1048 break;
1050 case PLUS:
1051 reg = XEXP (addr, 0);
1052 offset = XEXP (addr, 1);
1053 if (GET_CODE (offset) == REG)
1055 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1056 reg_names[REGNO (offset)]);
1058 else if (GET_CODE (offset) == CONST_INT)
1060 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1061 INTVAL (offset), reg_names[REGNO (reg)]);
1063 else
1064 abort ();
1065 break;
1067 case CONST:
1068 case LABEL_REF:
1069 case SYMBOL_REF:
1070 case CONST_INT:
1071 output_addr_const (file, addr);
1072 break;
1074 default:
1075 debug_rtx (addr);
1076 abort ();
1080 void
1081 print_operand (FILE * file, rtx x, int code)
1083 machine_mode mode = GET_MODE (x);
1084 HOST_WIDE_INT val;
1085 unsigned char arr[16];
1086 int xcode = GET_CODE (x);
1087 int i, info;
1088 if (GET_MODE (x) == VOIDmode)
1089 switch (code)
1091 case 'L': /* 128 bits, signed */
1092 case 'm': /* 128 bits, signed */
1093 case 'T': /* 128 bits, signed */
1094 case 't': /* 128 bits, signed */
1095 mode = TImode;
1096 break;
1097 case 'K': /* 64 bits, signed */
1098 case 'k': /* 64 bits, signed */
1099 case 'D': /* 64 bits, signed */
1100 case 'd': /* 64 bits, signed */
1101 mode = DImode;
1102 break;
1103 case 'J': /* 32 bits, signed */
1104 case 'j': /* 32 bits, signed */
1105 case 's': /* 32 bits, signed */
1106 case 'S': /* 32 bits, signed */
1107 mode = SImode;
1108 break;
1110 switch (code)
1113 case 'j': /* 32 bits, signed */
1114 case 'k': /* 64 bits, signed */
1115 case 'm': /* 128 bits, signed */
1116 if (xcode == CONST_INT
1117 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1119 gcc_assert (logical_immediate_p (x, mode));
1120 constant_to_array (mode, x, arr);
1121 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1122 val = trunc_int_for_mode (val, SImode);
1123 switch (which_logical_immediate (val))
1125 case SPU_ORI:
1126 break;
1127 case SPU_ORHI:
1128 fprintf (file, "h");
1129 break;
1130 case SPU_ORBI:
1131 fprintf (file, "b");
1132 break;
1133 default:
1134 gcc_unreachable();
1137 else
1138 gcc_unreachable();
1139 return;
1141 case 'J': /* 32 bits, signed */
1142 case 'K': /* 64 bits, signed */
1143 case 'L': /* 128 bits, signed */
1144 if (xcode == CONST_INT
1145 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1147 gcc_assert (logical_immediate_p (x, mode)
1148 || iohl_immediate_p (x, mode));
1149 constant_to_array (mode, x, arr);
1150 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1151 val = trunc_int_for_mode (val, SImode);
1152 switch (which_logical_immediate (val))
1154 case SPU_ORI:
1155 case SPU_IOHL:
1156 break;
1157 case SPU_ORHI:
1158 val = trunc_int_for_mode (val, HImode);
1159 break;
1160 case SPU_ORBI:
1161 val = trunc_int_for_mode (val, QImode);
1162 break;
1163 default:
1164 gcc_unreachable();
1166 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1168 else
1169 gcc_unreachable();
1170 return;
1172 case 't': /* 128 bits, signed */
1173 case 'd': /* 64 bits, signed */
1174 case 's': /* 32 bits, signed */
1175 if (CONSTANT_P (x))
1177 enum immediate_class c = classify_immediate (x, mode);
1178 switch (c)
1180 case IC_IL1:
1181 constant_to_array (mode, x, arr);
1182 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1183 val = trunc_int_for_mode (val, SImode);
1184 switch (which_immediate_load (val))
1186 case SPU_IL:
1187 break;
1188 case SPU_ILA:
1189 fprintf (file, "a");
1190 break;
1191 case SPU_ILH:
1192 fprintf (file, "h");
1193 break;
1194 case SPU_ILHU:
1195 fprintf (file, "hu");
1196 break;
1197 default:
1198 gcc_unreachable ();
1200 break;
1201 case IC_CPAT:
1202 constant_to_array (mode, x, arr);
1203 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1204 if (info == 1)
1205 fprintf (file, "b");
1206 else if (info == 2)
1207 fprintf (file, "h");
1208 else if (info == 4)
1209 fprintf (file, "w");
1210 else if (info == 8)
1211 fprintf (file, "d");
1212 break;
1213 case IC_IL1s:
1214 if (xcode == CONST_VECTOR)
1216 x = CONST_VECTOR_ELT (x, 0);
1217 xcode = GET_CODE (x);
1219 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1220 fprintf (file, "a");
1221 else if (xcode == HIGH)
1222 fprintf (file, "hu");
1223 break;
1224 case IC_FSMBI:
1225 case IC_FSMBI2:
1226 case IC_IL2:
1227 case IC_IL2s:
1228 case IC_POOL:
1229 abort ();
1232 else
1233 gcc_unreachable ();
1234 return;
1236 case 'T': /* 128 bits, signed */
1237 case 'D': /* 64 bits, signed */
1238 case 'S': /* 32 bits, signed */
1239 if (CONSTANT_P (x))
1241 enum immediate_class c = classify_immediate (x, mode);
1242 switch (c)
1244 case IC_IL1:
1245 constant_to_array (mode, x, arr);
1246 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1247 val = trunc_int_for_mode (val, SImode);
1248 switch (which_immediate_load (val))
1250 case SPU_IL:
1251 case SPU_ILA:
1252 break;
1253 case SPU_ILH:
1254 case SPU_ILHU:
1255 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1256 break;
1257 default:
1258 gcc_unreachable ();
1260 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1261 break;
1262 case IC_FSMBI:
1263 constant_to_array (mode, x, arr);
1264 val = 0;
1265 for (i = 0; i < 16; i++)
1267 val <<= 1;
1268 val |= arr[i] & 1;
1270 print_operand (file, GEN_INT (val), 0);
1271 break;
1272 case IC_CPAT:
1273 constant_to_array (mode, x, arr);
1274 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1275 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1276 break;
1277 case IC_IL1s:
1278 if (xcode == HIGH)
1279 x = XEXP (x, 0);
1280 if (GET_CODE (x) == CONST_VECTOR)
1281 x = CONST_VECTOR_ELT (x, 0);
1282 output_addr_const (file, x);
1283 if (xcode == HIGH)
1284 fprintf (file, "@h");
1285 break;
1286 case IC_IL2:
1287 case IC_IL2s:
1288 case IC_FSMBI2:
1289 case IC_POOL:
1290 abort ();
1293 else
1294 gcc_unreachable ();
1295 return;
1297 case 'C':
1298 if (xcode == CONST_INT)
1300 /* Only 4 least significant bits are relevant for generate
1301 control word instructions. */
1302 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1303 return;
1305 break;
1307 case 'M': /* print code for c*d */
1308 if (GET_CODE (x) == CONST_INT)
1309 switch (INTVAL (x))
1311 case 1:
1312 fprintf (file, "b");
1313 break;
1314 case 2:
1315 fprintf (file, "h");
1316 break;
1317 case 4:
1318 fprintf (file, "w");
1319 break;
1320 case 8:
1321 fprintf (file, "d");
1322 break;
1323 default:
1324 gcc_unreachable();
1326 else
1327 gcc_unreachable();
1328 return;
1330 case 'N': /* Negate the operand */
1331 if (xcode == CONST_INT)
1332 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1333 else if (xcode == CONST_VECTOR)
1334 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1335 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1336 return;
1338 case 'I': /* enable/disable interrupts */
1339 if (xcode == CONST_INT)
1340 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1341 return;
1343 case 'b': /* branch modifiers */
1344 if (xcode == REG)
1345 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1346 else if (COMPARISON_P (x))
1347 fprintf (file, "%s", xcode == NE ? "n" : "");
1348 return;
1350 case 'i': /* indirect call */
1351 if (xcode == MEM)
1353 if (GET_CODE (XEXP (x, 0)) == REG)
1354 /* Used in indirect function calls. */
1355 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1356 else
1357 output_address (GET_MODE (x), XEXP (x, 0));
1359 return;
1361 case 'p': /* load/store */
1362 if (xcode == MEM)
1364 x = XEXP (x, 0);
1365 xcode = GET_CODE (x);
1367 if (xcode == AND)
1369 x = XEXP (x, 0);
1370 xcode = GET_CODE (x);
1372 if (xcode == REG)
1373 fprintf (file, "d");
1374 else if (xcode == CONST_INT)
1375 fprintf (file, "a");
1376 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1377 fprintf (file, "r");
1378 else if (xcode == PLUS || xcode == LO_SUM)
1380 if (GET_CODE (XEXP (x, 1)) == REG)
1381 fprintf (file, "x");
1382 else
1383 fprintf (file, "d");
1385 return;
1387 case 'e':
1388 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1389 val &= 0x7;
1390 output_addr_const (file, GEN_INT (val));
1391 return;
1393 case 'f':
1394 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1395 val &= 0x1f;
1396 output_addr_const (file, GEN_INT (val));
1397 return;
1399 case 'g':
1400 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1401 val &= 0x3f;
1402 output_addr_const (file, GEN_INT (val));
1403 return;
1405 case 'h':
1406 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1407 val = (val >> 3) & 0x1f;
1408 output_addr_const (file, GEN_INT (val));
1409 return;
1411 case 'E':
1412 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1413 val = -val;
1414 val &= 0x7;
1415 output_addr_const (file, GEN_INT (val));
1416 return;
1418 case 'F':
1419 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1420 val = -val;
1421 val &= 0x1f;
1422 output_addr_const (file, GEN_INT (val));
1423 return;
1425 case 'G':
1426 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1427 val = -val;
1428 val &= 0x3f;
1429 output_addr_const (file, GEN_INT (val));
1430 return;
1432 case 'H':
1433 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1434 val = -(val & -8ll);
1435 val = (val >> 3) & 0x1f;
1436 output_addr_const (file, GEN_INT (val));
1437 return;
1439 case 'v':
1440 case 'w':
1441 constant_to_array (mode, x, arr);
1442 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1443 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1444 return;
1446 case 0:
1447 if (xcode == REG)
1448 fprintf (file, "%s", reg_names[REGNO (x)]);
1449 else if (xcode == MEM)
1450 output_address (GET_MODE (x), XEXP (x, 0));
1451 else if (xcode == CONST_VECTOR)
1452 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1453 else
1454 output_addr_const (file, x);
1455 return;
1457 /* unused letters
1458 o qr u yz
1459 AB OPQR UVWXYZ */
1460 default:
1461 output_operand_lossage ("invalid %%xn code");
1463 gcc_unreachable ();
1466 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1467 caller saved register. For leaf functions it is more efficient to
1468 use a volatile register because we won't need to save and restore the
1469 pic register. This routine is only valid after register allocation
1470 is completed, so we can pick an unused register. */
1471 static rtx
1472 get_pic_reg (void)
1474 if (!reload_completed && !reload_in_progress)
1475 abort ();
1477 /* If we've already made the decision, we need to keep with it. Once we've
1478 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1479 return true since the register is now live; this should not cause us to
1480 "switch back" to using pic_offset_table_rtx. */
1481 if (!cfun->machine->pic_reg)
1483 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1484 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1485 else
1486 cfun->machine->pic_reg = pic_offset_table_rtx;
1489 return cfun->machine->pic_reg;
1492 /* Split constant addresses to handle cases that are too large.
1493 Add in the pic register when in PIC mode.
1494 Split immediates that require more than 1 instruction. */
1496 spu_split_immediate (rtx * ops)
1498 machine_mode mode = GET_MODE (ops[0]);
1499 enum immediate_class c = classify_immediate (ops[1], mode);
1501 switch (c)
1503 case IC_IL2:
1505 unsigned char arrhi[16];
1506 unsigned char arrlo[16];
1507 rtx to, temp, hi, lo;
1508 int i;
1509 /* We need to do reals as ints because the constant used in the
1510 IOR might not be a legitimate real constant. */
1511 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1512 constant_to_array (mode, ops[1], arrhi);
1513 if (imode != mode)
1514 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1515 else
1516 to = ops[0];
1517 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1518 for (i = 0; i < 16; i += 4)
1520 arrlo[i + 2] = arrhi[i + 2];
1521 arrlo[i + 3] = arrhi[i + 3];
1522 arrlo[i + 0] = arrlo[i + 1] = 0;
1523 arrhi[i + 2] = arrhi[i + 3] = 0;
1525 hi = array_to_constant (imode, arrhi);
1526 lo = array_to_constant (imode, arrlo);
1527 emit_move_insn (temp, hi);
1528 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1529 return 1;
1531 case IC_FSMBI2:
1533 unsigned char arr_fsmbi[16];
1534 unsigned char arr_andbi[16];
1535 rtx to, reg_fsmbi, reg_and;
1536 int i;
1537 /* We need to do reals as ints because the constant used in the
1538 * AND might not be a legitimate real constant. */
1539 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1540 constant_to_array (mode, ops[1], arr_fsmbi);
1541 if (imode != mode)
1542 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1543 else
1544 to = ops[0];
1545 for (i = 0; i < 16; i++)
1546 if (arr_fsmbi[i] != 0)
1548 arr_andbi[0] = arr_fsmbi[i];
1549 arr_fsmbi[i] = 0xff;
1551 for (i = 1; i < 16; i++)
1552 arr_andbi[i] = arr_andbi[0];
1553 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1554 reg_and = array_to_constant (imode, arr_andbi);
1555 emit_move_insn (to, reg_fsmbi);
1556 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1557 return 1;
1559 case IC_POOL:
1560 if (reload_in_progress || reload_completed)
1562 rtx mem = force_const_mem (mode, ops[1]);
1563 if (TARGET_LARGE_MEM)
1565 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1566 emit_move_insn (addr, XEXP (mem, 0));
1567 mem = replace_equiv_address (mem, addr);
1569 emit_move_insn (ops[0], mem);
1570 return 1;
1572 break;
1573 case IC_IL1s:
1574 case IC_IL2s:
1575 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1577 if (c == IC_IL2s)
1579 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1580 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1582 else if (flag_pic)
1583 emit_insn (gen_pic (ops[0], ops[1]));
1584 if (flag_pic)
1586 rtx pic_reg = get_pic_reg ();
1587 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1589 return flag_pic || c == IC_IL2s;
1591 break;
1592 case IC_IL1:
1593 case IC_FSMBI:
1594 case IC_CPAT:
1595 break;
1597 return 0;
1600 /* SAVING is TRUE when we are generating the actual load and store
1601 instructions for REGNO. When determining the size of the stack
1602 needed for saving register we must allocate enough space for the
1603 worst case, because we don't always have the information early enough
1604 to not allocate it. But we can at least eliminate the actual loads
1605 and stores during the prologue/epilogue. */
1606 static int
1607 need_to_save_reg (int regno, int saving)
1609 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1610 return 1;
1611 if (flag_pic
1612 && regno == PIC_OFFSET_TABLE_REGNUM
1613 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1614 return 1;
1615 return 0;
1618 /* This function is only correct starting with local register
1619 allocation */
1621 spu_saved_regs_size (void)
1623 int reg_save_size = 0;
1624 int regno;
1626 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1627 if (need_to_save_reg (regno, 0))
1628 reg_save_size += 0x10;
1629 return reg_save_size;
1632 static rtx_insn *
1633 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1635 rtx reg = gen_rtx_REG (V4SImode, regno);
1636 rtx mem =
1637 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1638 return emit_insn (gen_movv4si (mem, reg));
1641 static rtx_insn *
1642 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1644 rtx reg = gen_rtx_REG (V4SImode, regno);
1645 rtx mem =
1646 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1647 return emit_insn (gen_movv4si (reg, mem));
1650 /* This happens after reload, so we need to expand it. */
1651 static rtx_insn *
1652 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1654 rtx_insn *insn;
1655 if (satisfies_constraint_K (GEN_INT (imm)))
1657 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1659 else
1661 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1662 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1663 if (REGNO (src) == REGNO (scratch))
1664 abort ();
1666 return insn;
1669 /* Return nonzero if this function is known to have a null epilogue. */
1672 direct_return (void)
1674 if (reload_completed)
1676 if (cfun->static_chain_decl == 0
1677 && (spu_saved_regs_size ()
1678 + get_frame_size ()
1679 + crtl->outgoing_args_size
1680 + crtl->args.pretend_args_size == 0)
1681 && crtl->is_leaf)
1682 return 1;
1684 return 0;
1688 The stack frame looks like this:
1689 +-------------+
1690 | incoming |
1691 | args |
1692 AP -> +-------------+
1693 | $lr save |
1694 +-------------+
1695 prev SP | back chain |
1696 +-------------+
1697 | var args |
1698 | reg save | crtl->args.pretend_args_size bytes
1699 +-------------+
1700 | ... |
1701 | saved regs | spu_saved_regs_size() bytes
1702 FP -> +-------------+
1703 | ... |
1704 | vars | get_frame_size() bytes
1705 HFP -> +-------------+
1706 | ... |
1707 | outgoing |
1708 | args | crtl->outgoing_args_size bytes
1709 +-------------+
1710 | $lr of next |
1711 | frame |
1712 +-------------+
1713 | back chain |
1714 SP -> +-------------+
1717 void
1718 spu_expand_prologue (void)
1720 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1721 HOST_WIDE_INT total_size;
1722 HOST_WIDE_INT saved_regs_size;
1723 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1724 rtx scratch_reg_0, scratch_reg_1;
1725 rtx_insn *insn;
1726 rtx real;
1728 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1729 cfun->machine->pic_reg = pic_offset_table_rtx;
1731 if (spu_naked_function_p (current_function_decl))
1732 return;
1734 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1735 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1737 saved_regs_size = spu_saved_regs_size ();
1738 total_size = size + saved_regs_size
1739 + crtl->outgoing_args_size
1740 + crtl->args.pretend_args_size;
1742 if (!crtl->is_leaf
1743 || cfun->calls_alloca || total_size > 0)
1744 total_size += STACK_POINTER_OFFSET;
1746 /* Save this first because code after this might use the link
1747 register as a scratch register. */
1748 if (!crtl->is_leaf)
1750 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1751 RTX_FRAME_RELATED_P (insn) = 1;
1754 if (total_size > 0)
1756 offset = -crtl->args.pretend_args_size;
1757 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1758 if (need_to_save_reg (regno, 1))
1760 offset -= 16;
1761 insn = frame_emit_store (regno, sp_reg, offset);
1762 RTX_FRAME_RELATED_P (insn) = 1;
1766 if (flag_pic && cfun->machine->pic_reg)
1768 rtx pic_reg = cfun->machine->pic_reg;
1769 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1770 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1773 if (total_size > 0)
1775 if (flag_stack_check || flag_stack_clash_protection)
1777 /* We compare against total_size-1 because
1778 ($sp >= total_size) <=> ($sp > total_size-1) */
1779 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1780 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1781 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1782 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1784 emit_move_insn (scratch_v4si, size_v4si);
1785 size_v4si = scratch_v4si;
1787 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1788 emit_insn (gen_vec_extractv4sisi
1789 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1790 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1793 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1794 the value of the previous $sp because we save it as the back
1795 chain. */
1796 if (total_size <= 2000)
1798 /* In this case we save the back chain first. */
1799 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1800 insn =
1801 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1803 else
1805 insn = emit_move_insn (scratch_reg_0, sp_reg);
1806 insn =
1807 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1809 RTX_FRAME_RELATED_P (insn) = 1;
1810 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1811 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1813 if (total_size > 2000)
1815 /* Save the back chain ptr */
1816 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1819 if (frame_pointer_needed)
1821 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1822 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1823 + crtl->outgoing_args_size;
1824 /* Set the new frame_pointer */
1825 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1826 RTX_FRAME_RELATED_P (insn) = 1;
1827 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1828 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1829 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1833 if (flag_stack_usage_info)
1834 current_function_static_stack_size = total_size;
1837 void
1838 spu_expand_epilogue (bool sibcall_p)
1840 int size = get_frame_size (), offset, regno;
1841 HOST_WIDE_INT saved_regs_size, total_size;
1842 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1843 rtx scratch_reg_0;
1845 if (spu_naked_function_p (current_function_decl))
1846 return;
1848 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1850 saved_regs_size = spu_saved_regs_size ();
1851 total_size = size + saved_regs_size
1852 + crtl->outgoing_args_size
1853 + crtl->args.pretend_args_size;
1855 if (!crtl->is_leaf
1856 || cfun->calls_alloca || total_size > 0)
1857 total_size += STACK_POINTER_OFFSET;
1859 if (total_size > 0)
1861 if (cfun->calls_alloca)
1862 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1863 else
1864 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1867 if (saved_regs_size > 0)
1869 offset = -crtl->args.pretend_args_size;
1870 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1871 if (need_to_save_reg (regno, 1))
1873 offset -= 0x10;
1874 frame_emit_load (regno, sp_reg, offset);
1879 if (!crtl->is_leaf)
1880 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1882 if (!sibcall_p)
1884 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1885 emit_jump_insn (gen__return ());
1890 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1892 if (count != 0)
1893 return 0;
1894 /* This is inefficient because it ends up copying to a save-register
1895 which then gets saved even though $lr has already been saved. But
1896 it does generate better code for leaf functions and we don't need
1897 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1898 used for __builtin_return_address anyway, so maybe we don't care if
1899 it's inefficient. */
1900 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1904 /* Given VAL, generate a constant appropriate for MODE.
1905 If MODE is a vector mode, every element will be VAL.
1906 For TImode, VAL will be zero extended to 128 bits. */
1908 spu_const (machine_mode mode, HOST_WIDE_INT val)
1910 rtx inner;
1912 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1913 || GET_MODE_CLASS (mode) == MODE_FLOAT
1914 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1915 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1917 if (GET_MODE_CLASS (mode) == MODE_INT)
1918 return immed_double_const (val, 0, mode);
1920 /* val is the bit representation of the float */
1921 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1922 return hwint_to_const_double (mode, val);
1924 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1925 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1926 else
1927 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1929 return gen_const_vec_duplicate (mode, inner);
1932 /* Create a MODE vector constant from 4 ints. */
1934 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1936 unsigned char arr[16];
1937 arr[0] = (a >> 24) & 0xff;
1938 arr[1] = (a >> 16) & 0xff;
1939 arr[2] = (a >> 8) & 0xff;
1940 arr[3] = (a >> 0) & 0xff;
1941 arr[4] = (b >> 24) & 0xff;
1942 arr[5] = (b >> 16) & 0xff;
1943 arr[6] = (b >> 8) & 0xff;
1944 arr[7] = (b >> 0) & 0xff;
1945 arr[8] = (c >> 24) & 0xff;
1946 arr[9] = (c >> 16) & 0xff;
1947 arr[10] = (c >> 8) & 0xff;
1948 arr[11] = (c >> 0) & 0xff;
1949 arr[12] = (d >> 24) & 0xff;
1950 arr[13] = (d >> 16) & 0xff;
1951 arr[14] = (d >> 8) & 0xff;
1952 arr[15] = (d >> 0) & 0xff;
1953 return array_to_constant(mode, arr);
1956 /* branch hint stuff */
1958 /* An array of these is used to propagate hints to predecessor blocks. */
1959 struct spu_bb_info
1961 rtx_insn *prop_jump; /* propagated from another block */
1962 int bb_index; /* the original block. */
1964 static struct spu_bb_info *spu_bb_info;
1966 #define STOP_HINT_P(INSN) \
1967 (CALL_P(INSN) \
1968 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1969 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1971 /* 1 when RTX is a hinted branch or its target. We keep track of
1972 what has been hinted so the safe-hint code can test it easily. */
1973 #define HINTED_P(RTX) \
1974 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1976 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1977 #define SCHED_ON_EVEN_P(RTX) \
1978 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1980 /* Emit a nop for INSN such that the two will dual issue. This assumes
1981 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1982 We check for TImode to handle a MULTI1 insn which has dual issued its
1983 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1984 static void
1985 emit_nop_for_insn (rtx_insn *insn)
1987 int p;
1988 rtx_insn *new_insn;
1990 /* We need to handle JUMP_TABLE_DATA separately. */
1991 if (JUMP_TABLE_DATA_P (insn))
1993 new_insn = emit_insn_after (gen_lnop(), insn);
1994 recog_memoized (new_insn);
1995 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1996 return;
1999 p = get_pipe (insn);
2000 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2001 new_insn = emit_insn_after (gen_lnop (), insn);
2002 else if (p == 1 && GET_MODE (insn) == TImode)
2004 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2005 PUT_MODE (new_insn, TImode);
2006 PUT_MODE (insn, VOIDmode);
2008 else
2009 new_insn = emit_insn_after (gen_lnop (), insn);
2010 recog_memoized (new_insn);
2011 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2014 /* Insert nops in basic blocks to meet dual issue alignment
2015 requirements. Also make sure hbrp and hint instructions are at least
2016 one cycle apart, possibly inserting a nop. */
2017 static void
2018 pad_bb(void)
2020 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2021 int length;
2022 int addr;
2024 /* This sets up INSN_ADDRESSES. */
2025 shorten_branches (get_insns ());
2027 /* Keep track of length added by nops. */
2028 length = 0;
2030 prev_insn = 0;
2031 insn = get_insns ();
2032 if (!active_insn_p (insn))
2033 insn = next_active_insn (insn);
2034 for (; insn; insn = next_insn)
2036 next_insn = next_active_insn (insn);
2037 if (INSN_P (insn)
2038 && (INSN_CODE (insn) == CODE_FOR_iprefetch
2039 || INSN_CODE (insn) == CODE_FOR_hbr))
2041 if (hbr_insn)
2043 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2044 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2045 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2046 || (a1 - a0 == 4))
2048 prev_insn = emit_insn_before (gen_lnop (), insn);
2049 PUT_MODE (prev_insn, GET_MODE (insn));
2050 PUT_MODE (insn, TImode);
2051 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2052 length += 4;
2055 hbr_insn = insn;
2057 if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2059 if (GET_MODE (insn) == TImode)
2060 PUT_MODE (next_insn, TImode);
2061 insn = next_insn;
2062 next_insn = next_active_insn (insn);
2064 addr = INSN_ADDRESSES (INSN_UID (insn));
2065 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2067 if (((addr + length) & 7) != 0)
2069 emit_nop_for_insn (prev_insn);
2070 length += 4;
2073 else if (GET_MODE (insn) == TImode
2074 && ((next_insn && GET_MODE (next_insn) != TImode)
2075 || get_attr_type (insn) == TYPE_MULTI0)
2076 && ((addr + length) & 7) != 0)
2078 /* prev_insn will always be set because the first insn is
2079 always 8-byte aligned. */
2080 emit_nop_for_insn (prev_insn);
2081 length += 4;
2083 prev_insn = insn;
2088 /* Routines for branch hints. */
2090 static void
2091 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2092 int distance, sbitmap blocks)
2094 rtx_insn *hint;
2095 rtx_insn *insn;
2096 rtx_jump_table_data *table;
2098 if (before == 0 || branch == 0 || target == 0)
2099 return;
2101 /* While scheduling we require hints to be no further than 600, so
2102 we need to enforce that here too */
2103 if (distance > 600)
2104 return;
2106 /* If we have a Basic block note, emit it after the basic block note. */
2107 if (NOTE_INSN_BASIC_BLOCK_P (before))
2108 before = NEXT_INSN (before);
2110 rtx_code_label *branch_label = gen_label_rtx ();
2111 LABEL_NUSES (branch_label)++;
2112 LABEL_PRESERVE_P (branch_label) = 1;
2113 insn = emit_label_before (branch_label, branch);
2114 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2115 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2117 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2118 recog_memoized (hint);
2119 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2120 HINTED_P (branch) = 1;
2122 if (GET_CODE (target) == LABEL_REF)
2123 HINTED_P (XEXP (target, 0)) = 1;
2124 else if (tablejump_p (branch, 0, &table))
2126 rtvec vec;
2127 int j;
2128 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2129 vec = XVEC (PATTERN (table), 0);
2130 else
2131 vec = XVEC (PATTERN (table), 1);
2132 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2133 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2136 if (distance >= 588)
2138 /* Make sure the hint isn't scheduled any earlier than this point,
2139 which could make it too far for the branch offest to fit */
2140 insn = emit_insn_before (gen_blockage (), hint);
2141 recog_memoized (insn);
2142 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2144 else if (distance <= 8 * 4)
2146 /* To guarantee at least 8 insns between the hint and branch we
2147 insert nops. */
2148 int d;
2149 for (d = distance; d < 8 * 4; d += 4)
2151 insn =
2152 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2153 recog_memoized (insn);
2154 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2157 /* Make sure any nops inserted aren't scheduled before the hint. */
2158 insn = emit_insn_after (gen_blockage (), hint);
2159 recog_memoized (insn);
2160 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2162 /* Make sure any nops inserted aren't scheduled after the call. */
2163 if (CALL_P (branch) && distance < 8 * 4)
2165 insn = emit_insn_before (gen_blockage (), branch);
2166 recog_memoized (insn);
2167 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2172 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2173 the rtx for the branch target. */
2174 static rtx
2175 get_branch_target (rtx_insn *branch)
2177 if (JUMP_P (branch))
2179 rtx set, src;
2181 /* Return statements */
2182 if (GET_CODE (PATTERN (branch)) == RETURN)
2183 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2185 /* ASM GOTOs. */
2186 if (extract_asm_operands (PATTERN (branch)) != NULL)
2187 return NULL;
2189 set = single_set (branch);
2190 src = SET_SRC (set);
2191 if (GET_CODE (SET_DEST (set)) != PC)
2192 abort ();
2194 if (GET_CODE (src) == IF_THEN_ELSE)
2196 rtx lab = 0;
2197 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2198 if (note)
2200 /* If the more probable case is not a fall through, then
2201 try a branch hint. */
2202 int prob = profile_probability::from_reg_br_prob_note
2203 (XINT (note, 0)).to_reg_br_prob_base ();
2204 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2205 && GET_CODE (XEXP (src, 1)) != PC)
2206 lab = XEXP (src, 1);
2207 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2208 && GET_CODE (XEXP (src, 2)) != PC)
2209 lab = XEXP (src, 2);
2211 if (lab)
2213 if (GET_CODE (lab) == RETURN)
2214 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2215 return lab;
2217 return 0;
2220 return src;
2222 else if (CALL_P (branch))
2224 rtx call;
2225 /* All of our call patterns are in a PARALLEL and the CALL is
2226 the first pattern in the PARALLEL. */
2227 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2228 abort ();
2229 call = XVECEXP (PATTERN (branch), 0, 0);
2230 if (GET_CODE (call) == SET)
2231 call = SET_SRC (call);
2232 if (GET_CODE (call) != CALL)
2233 abort ();
2234 return XEXP (XEXP (call, 0), 0);
2236 return 0;
2239 /* The special $hbr register is used to prevent the insn scheduler from
2240 moving hbr insns across instructions which invalidate them. It
2241 should only be used in a clobber, and this function searches for
2242 insns which clobber it. */
2243 static bool
2244 insn_clobbers_hbr (rtx_insn *insn)
2246 if (INSN_P (insn)
2247 && GET_CODE (PATTERN (insn)) == PARALLEL)
2249 rtx parallel = PATTERN (insn);
2250 rtx clobber;
2251 int j;
2252 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2254 clobber = XVECEXP (parallel, 0, j);
2255 if (GET_CODE (clobber) == CLOBBER
2256 && GET_CODE (XEXP (clobber, 0)) == REG
2257 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2258 return 1;
2261 return 0;
2264 /* Search up to 32 insns starting at FIRST:
2265 - at any kind of hinted branch, just return
2266 - at any unconditional branch in the first 15 insns, just return
2267 - at a call or indirect branch, after the first 15 insns, force it to
2268 an even address and return
2269 - at any unconditional branch, after the first 15 insns, force it to
2270 an even address.
2271 At then end of the search, insert an hbrp within 4 insns of FIRST,
2272 and an hbrp within 16 instructions of FIRST.
2274 static void
2275 insert_hbrp_for_ilb_runout (rtx_insn *first)
2277 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2278 int addr = 0, length, first_addr = -1;
2279 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2280 int insert_lnop_after = 0;
2281 for (insn = first; insn; insn = NEXT_INSN (insn))
2282 if (INSN_P (insn))
2284 if (first_addr == -1)
2285 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2286 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2287 length = get_attr_length (insn);
2289 if (before_4 == 0 && addr + length >= 4 * 4)
2290 before_4 = insn;
2291 /* We test for 14 instructions because the first hbrp will add
2292 up to 2 instructions. */
2293 if (before_16 == 0 && addr + length >= 14 * 4)
2294 before_16 = insn;
2296 if (INSN_CODE (insn) == CODE_FOR_hbr)
2298 /* Make sure an hbrp is at least 2 cycles away from a hint.
2299 Insert an lnop after the hbrp when necessary. */
2300 if (before_4 == 0 && addr > 0)
2302 before_4 = insn;
2303 insert_lnop_after |= 1;
2305 else if (before_4 && addr <= 4 * 4)
2306 insert_lnop_after |= 1;
2307 if (before_16 == 0 && addr > 10 * 4)
2309 before_16 = insn;
2310 insert_lnop_after |= 2;
2312 else if (before_16 && addr <= 14 * 4)
2313 insert_lnop_after |= 2;
2316 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2318 if (addr < hbrp_addr0)
2319 hbrp_addr0 = addr;
2320 else if (addr < hbrp_addr1)
2321 hbrp_addr1 = addr;
2324 if (CALL_P (insn) || JUMP_P (insn))
2326 if (HINTED_P (insn))
2327 return;
2329 /* Any branch after the first 15 insns should be on an even
2330 address to avoid a special case branch. There might be
2331 some nops and/or hbrps inserted, so we test after 10
2332 insns. */
2333 if (addr > 10 * 4)
2334 SCHED_ON_EVEN_P (insn) = 1;
2337 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2338 return;
2341 if (addr + length >= 32 * 4)
2343 gcc_assert (before_4 && before_16);
2344 if (hbrp_addr0 > 4 * 4)
2346 insn =
2347 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2348 recog_memoized (insn);
2349 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2350 INSN_ADDRESSES_NEW (insn,
2351 INSN_ADDRESSES (INSN_UID (before_4)));
2352 PUT_MODE (insn, GET_MODE (before_4));
2353 PUT_MODE (before_4, TImode);
2354 if (insert_lnop_after & 1)
2356 insn = emit_insn_before (gen_lnop (), before_4);
2357 recog_memoized (insn);
2358 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2359 INSN_ADDRESSES_NEW (insn,
2360 INSN_ADDRESSES (INSN_UID (before_4)));
2361 PUT_MODE (insn, TImode);
2364 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2365 && hbrp_addr1 > 16 * 4)
2367 insn =
2368 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2369 recog_memoized (insn);
2370 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2371 INSN_ADDRESSES_NEW (insn,
2372 INSN_ADDRESSES (INSN_UID (before_16)));
2373 PUT_MODE (insn, GET_MODE (before_16));
2374 PUT_MODE (before_16, TImode);
2375 if (insert_lnop_after & 2)
2377 insn = emit_insn_before (gen_lnop (), before_16);
2378 recog_memoized (insn);
2379 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2380 INSN_ADDRESSES_NEW (insn,
2381 INSN_ADDRESSES (INSN_UID
2382 (before_16)));
2383 PUT_MODE (insn, TImode);
2386 return;
2389 else if (BARRIER_P (insn))
2390 return;
2394 /* The SPU might hang when it executes 48 inline instructions after a
2395 hinted branch jumps to its hinted target. The beginning of a
2396 function and the return from a call might have been hinted, and
2397 must be handled as well. To prevent a hang we insert 2 hbrps. The
2398 first should be within 6 insns of the branch target. The second
2399 should be within 22 insns of the branch target. When determining
2400 if hbrps are necessary, we look for only 32 inline instructions,
2401 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2402 when inserting new hbrps, we insert them within 4 and 16 insns of
2403 the target. */
2404 static void
2405 insert_hbrp (void)
2407 rtx_insn *insn;
2408 if (TARGET_SAFE_HINTS)
2410 shorten_branches (get_insns ());
2411 /* Insert hbrp at beginning of function */
2412 insn = next_active_insn (get_insns ());
2413 if (insn)
2414 insert_hbrp_for_ilb_runout (insn);
2415 /* Insert hbrp after hinted targets. */
2416 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2417 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2418 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2422 static int in_spu_reorg;
2424 static void
2425 spu_var_tracking (void)
2427 if (flag_var_tracking)
2429 df_analyze ();
2430 timevar_push (TV_VAR_TRACKING);
2431 variable_tracking_main ();
2432 timevar_pop (TV_VAR_TRACKING);
2433 df_finish_pass (false);
2437 /* Insert branch hints. There are no branch optimizations after this
2438 pass, so it's safe to set our branch hints now. */
2439 static void
2440 spu_machine_dependent_reorg (void)
2442 sbitmap blocks;
2443 basic_block bb;
2444 rtx_insn *branch, *insn;
2445 rtx branch_target = 0;
2446 int branch_addr = 0, insn_addr, required_dist = 0;
2447 int i;
2448 unsigned int j;
2450 if (!TARGET_BRANCH_HINTS || optimize == 0)
2452 /* We still do it for unoptimized code because an external
2453 function might have hinted a call or return. */
2454 compute_bb_for_insn ();
2455 insert_hbrp ();
2456 pad_bb ();
2457 spu_var_tracking ();
2458 free_bb_for_insn ();
2459 return;
2462 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2463 bitmap_clear (blocks);
2465 in_spu_reorg = 1;
2466 compute_bb_for_insn ();
2468 /* (Re-)discover loops so that bb->loop_father can be used
2469 in the analysis below. */
2470 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2472 compact_blocks ();
2474 spu_bb_info =
2475 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2476 sizeof (struct spu_bb_info));
2478 /* We need exact insn addresses and lengths. */
2479 shorten_branches (get_insns ());
2481 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2483 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2484 branch = 0;
2485 if (spu_bb_info[i].prop_jump)
2487 branch = spu_bb_info[i].prop_jump;
2488 branch_target = get_branch_target (branch);
2489 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2490 required_dist = spu_hint_dist;
2492 /* Search from end of a block to beginning. In this loop, find
2493 jumps which need a branch and emit them only when:
2494 - it's an indirect branch and we're at the insn which sets
2495 the register
2496 - we're at an insn that will invalidate the hint. e.g., a
2497 call, another hint insn, inline asm that clobbers $hbr, and
2498 some inlined operations (divmodsi4). Don't consider jumps
2499 because they are only at the end of a block and are
2500 considered when we are deciding whether to propagate
2501 - we're getting too far away from the branch. The hbr insns
2502 only have a signed 10 bit offset
2503 We go back as far as possible so the branch will be considered
2504 for propagation when we get to the beginning of the block. */
2505 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2507 if (INSN_P (insn))
2509 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2510 if (branch
2511 && ((GET_CODE (branch_target) == REG
2512 && set_of (branch_target, insn) != NULL_RTX)
2513 || insn_clobbers_hbr (insn)
2514 || branch_addr - insn_addr > 600))
2516 rtx_insn *next = NEXT_INSN (insn);
2517 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2518 if (insn != BB_END (bb)
2519 && branch_addr - next_addr >= required_dist)
2521 if (dump_file)
2522 fprintf (dump_file,
2523 "hint for %i in block %i before %i\n",
2524 INSN_UID (branch), bb->index,
2525 INSN_UID (next));
2526 spu_emit_branch_hint (next, branch, branch_target,
2527 branch_addr - next_addr, blocks);
2529 branch = 0;
2532 /* JUMP_P will only be true at the end of a block. When
2533 branch is already set it means we've previously decided
2534 to propagate a hint for that branch into this block. */
2535 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2537 branch = 0;
2538 if ((branch_target = get_branch_target (insn)))
2540 branch = insn;
2541 branch_addr = insn_addr;
2542 required_dist = spu_hint_dist;
2546 if (insn == BB_HEAD (bb))
2547 break;
2550 if (branch)
2552 /* If we haven't emitted a hint for this branch yet, it might
2553 be profitable to emit it in one of the predecessor blocks,
2554 especially for loops. */
2555 rtx_insn *bbend;
2556 basic_block prev = 0, prop = 0, prev2 = 0;
2557 int loop_exit = 0, simple_loop = 0;
2558 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2560 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2561 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2562 prev = EDGE_PRED (bb, j)->src;
2563 else
2564 prev2 = EDGE_PRED (bb, j)->src;
2566 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2567 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2568 loop_exit = 1;
2569 else if (EDGE_SUCC (bb, j)->dest == bb)
2570 simple_loop = 1;
2572 /* If this branch is a loop exit then propagate to previous
2573 fallthru block. This catches the cases when it is a simple
2574 loop or when there is an initial branch into the loop. */
2575 if (prev && (loop_exit || simple_loop)
2576 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2577 prop = prev;
2579 /* If there is only one adjacent predecessor. Don't propagate
2580 outside this loop. */
2581 else if (prev && single_pred_p (bb)
2582 && prev->loop_father == bb->loop_father)
2583 prop = prev;
2585 /* If this is the JOIN block of a simple IF-THEN then
2586 propagate the hint to the HEADER block. */
2587 else if (prev && prev2
2588 && EDGE_COUNT (bb->preds) == 2
2589 && EDGE_COUNT (prev->preds) == 1
2590 && EDGE_PRED (prev, 0)->src == prev2
2591 && prev2->loop_father == bb->loop_father
2592 && GET_CODE (branch_target) != REG)
2593 prop = prev;
2595 /* Don't propagate when:
2596 - this is a simple loop and the hint would be too far
2597 - this is not a simple loop and there are 16 insns in
2598 this block already
2599 - the predecessor block ends in a branch that will be
2600 hinted
2601 - the predecessor block ends in an insn that invalidates
2602 the hint */
2603 if (prop
2604 && prop->index >= 0
2605 && (bbend = BB_END (prop))
2606 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2607 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2608 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2610 if (dump_file)
2611 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2612 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2613 bb->index, prop->index, bb_loop_depth (bb),
2614 INSN_UID (branch), loop_exit, simple_loop,
2615 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2617 spu_bb_info[prop->index].prop_jump = branch;
2618 spu_bb_info[prop->index].bb_index = i;
2620 else if (branch_addr - next_addr >= required_dist)
2622 if (dump_file)
2623 fprintf (dump_file, "hint for %i in block %i before %i\n",
2624 INSN_UID (branch), bb->index,
2625 INSN_UID (NEXT_INSN (insn)));
2626 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2627 branch_addr - next_addr, blocks);
2629 branch = 0;
2632 free (spu_bb_info);
2634 if (!bitmap_empty_p (blocks))
2635 find_many_sub_basic_blocks (blocks);
2637 /* We have to schedule to make sure alignment is ok. */
2638 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2640 /* The hints need to be scheduled, so call it again. */
2641 schedule_insns ();
2642 df_finish_pass (true);
2644 insert_hbrp ();
2646 pad_bb ();
2648 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2649 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2651 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2652 between its branch label and the branch . We don't move the
2653 label because GCC expects it at the beginning of the block. */
2654 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2655 rtx label_ref = XVECEXP (unspec, 0, 0);
2656 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2657 rtx_insn *branch;
2658 int offset = 0;
2659 for (branch = NEXT_INSN (label);
2660 !JUMP_P (branch) && !CALL_P (branch);
2661 branch = NEXT_INSN (branch))
2662 if (NONJUMP_INSN_P (branch))
2663 offset += get_attr_length (branch);
2664 if (offset > 0)
2665 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2668 spu_var_tracking ();
2670 loop_optimizer_finalize ();
2672 free_bb_for_insn ();
2674 in_spu_reorg = 0;
2678 /* Insn scheduling routines, primarily for dual issue. */
2679 static int
2680 spu_sched_issue_rate (void)
2682 return 2;
2685 static int
2686 uses_ls_unit(rtx_insn *insn)
2688 rtx set = single_set (insn);
2689 if (set != 0
2690 && (GET_CODE (SET_DEST (set)) == MEM
2691 || GET_CODE (SET_SRC (set)) == MEM))
2692 return 1;
2693 return 0;
2696 static int
2697 get_pipe (rtx_insn *insn)
2699 enum attr_type t;
2700 /* Handle inline asm */
2701 if (INSN_CODE (insn) == -1)
2702 return -1;
2703 t = get_attr_type (insn);
2704 switch (t)
2706 case TYPE_CONVERT:
2707 return -2;
2708 case TYPE_MULTI0:
2709 return -1;
2711 case TYPE_FX2:
2712 case TYPE_FX3:
2713 case TYPE_SPR:
2714 case TYPE_NOP:
2715 case TYPE_FXB:
2716 case TYPE_FPD:
2717 case TYPE_FP6:
2718 case TYPE_FP7:
2719 return 0;
2721 case TYPE_LNOP:
2722 case TYPE_SHUF:
2723 case TYPE_LOAD:
2724 case TYPE_STORE:
2725 case TYPE_BR:
2726 case TYPE_MULTI1:
2727 case TYPE_HBR:
2728 case TYPE_IPREFETCH:
2729 return 1;
2730 default:
2731 abort ();
2736 /* haifa-sched.c has a static variable that keeps track of the current
2737 cycle. It is passed to spu_sched_reorder, and we record it here for
2738 use by spu_sched_variable_issue. It won't be accurate if the
2739 scheduler updates it's clock_var between the two calls. */
2740 static int clock_var;
2742 /* This is used to keep track of insn alignment. Set to 0 at the
2743 beginning of each block and increased by the "length" attr of each
2744 insn scheduled. */
2745 static int spu_sched_length;
2747 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2748 ready list appropriately in spu_sched_reorder(). */
2749 static int pipe0_clock;
2750 static int pipe1_clock;
2752 static int prev_clock_var;
2754 static int prev_priority;
2756 /* The SPU needs to load the next ilb sometime during the execution of
2757 the previous ilb. There is a potential conflict if every cycle has a
2758 load or store. To avoid the conflict we make sure the load/store
2759 unit is free for at least one cycle during the execution of insns in
2760 the previous ilb. */
2761 static int spu_ls_first;
2762 static int prev_ls_clock;
2764 static void
2765 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2766 int max_ready ATTRIBUTE_UNUSED)
2768 spu_sched_length = 0;
2771 static void
2772 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2773 int max_ready ATTRIBUTE_UNUSED)
2775 if (align_labels_value > 4 || align_loops_value > 4 || align_jumps_value > 4)
2777 /* When any block might be at least 8-byte aligned, assume they
2778 will all be at least 8-byte aligned to make sure dual issue
2779 works out correctly. */
2780 spu_sched_length = 0;
2782 spu_ls_first = INT_MAX;
2783 clock_var = -1;
2784 prev_ls_clock = -1;
2785 pipe0_clock = -1;
2786 pipe1_clock = -1;
2787 prev_clock_var = -1;
2788 prev_priority = -1;
2791 static int
2792 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2793 int verbose ATTRIBUTE_UNUSED,
2794 rtx_insn *insn, int more)
2796 int len;
2797 int p;
2798 if (GET_CODE (PATTERN (insn)) == USE
2799 || GET_CODE (PATTERN (insn)) == CLOBBER
2800 || (len = get_attr_length (insn)) == 0)
2801 return more;
2803 spu_sched_length += len;
2805 /* Reset on inline asm */
2806 if (INSN_CODE (insn) == -1)
2808 spu_ls_first = INT_MAX;
2809 pipe0_clock = -1;
2810 pipe1_clock = -1;
2811 return 0;
2813 p = get_pipe (insn);
2814 if (p == 0)
2815 pipe0_clock = clock_var;
2816 else
2817 pipe1_clock = clock_var;
2819 if (in_spu_reorg)
2821 if (clock_var - prev_ls_clock > 1
2822 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2823 spu_ls_first = INT_MAX;
2824 if (uses_ls_unit (insn))
2826 if (spu_ls_first == INT_MAX)
2827 spu_ls_first = spu_sched_length;
2828 prev_ls_clock = clock_var;
2831 /* The scheduler hasn't inserted the nop, but we will later on.
2832 Include those nops in spu_sched_length. */
2833 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2834 spu_sched_length += 4;
2835 prev_clock_var = clock_var;
2837 /* more is -1 when called from spu_sched_reorder for new insns
2838 that don't have INSN_PRIORITY */
2839 if (more >= 0)
2840 prev_priority = INSN_PRIORITY (insn);
2843 /* Always try issuing more insns. spu_sched_reorder will decide
2844 when the cycle should be advanced. */
2845 return 1;
2848 /* This function is called for both TARGET_SCHED_REORDER and
2849 TARGET_SCHED_REORDER2. */
2850 static int
2851 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2852 rtx_insn **ready, int *nreadyp, int clock)
2854 int i, nready = *nreadyp;
2855 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2856 rtx_insn *insn;
2858 clock_var = clock;
2860 if (nready <= 0 || pipe1_clock >= clock)
2861 return 0;
2863 /* Find any rtl insns that don't generate assembly insns and schedule
2864 them first. */
2865 for (i = nready - 1; i >= 0; i--)
2867 insn = ready[i];
2868 if (INSN_CODE (insn) == -1
2869 || INSN_CODE (insn) == CODE_FOR_blockage
2870 || (INSN_P (insn) && get_attr_length (insn) == 0))
2872 ready[i] = ready[nready - 1];
2873 ready[nready - 1] = insn;
2874 return 1;
2878 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2879 for (i = 0; i < nready; i++)
2880 if (INSN_CODE (ready[i]) != -1)
2882 insn = ready[i];
2883 switch (get_attr_type (insn))
2885 default:
2886 case TYPE_MULTI0:
2887 case TYPE_CONVERT:
2888 case TYPE_FX2:
2889 case TYPE_FX3:
2890 case TYPE_SPR:
2891 case TYPE_NOP:
2892 case TYPE_FXB:
2893 case TYPE_FPD:
2894 case TYPE_FP6:
2895 case TYPE_FP7:
2896 pipe_0 = i;
2897 break;
2898 case TYPE_LOAD:
2899 case TYPE_STORE:
2900 pipe_ls = i;
2901 /* FALLTHRU */
2902 case TYPE_LNOP:
2903 case TYPE_SHUF:
2904 case TYPE_BR:
2905 case TYPE_MULTI1:
2906 case TYPE_HBR:
2907 pipe_1 = i;
2908 break;
2909 case TYPE_IPREFETCH:
2910 pipe_hbrp = i;
2911 break;
2915 /* In the first scheduling phase, schedule loads and stores together
2916 to increase the chance they will get merged during postreload CSE. */
2917 if (!reload_completed && pipe_ls >= 0)
2919 insn = ready[pipe_ls];
2920 ready[pipe_ls] = ready[nready - 1];
2921 ready[nready - 1] = insn;
2922 return 1;
2925 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2926 if (pipe_hbrp >= 0)
2927 pipe_1 = pipe_hbrp;
2929 /* When we have loads/stores in every cycle of the last 15 insns and
2930 we are about to schedule another load/store, emit an hbrp insn
2931 instead. */
2932 if (in_spu_reorg
2933 && spu_sched_length - spu_ls_first >= 4 * 15
2934 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2936 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2937 recog_memoized (insn);
2938 if (pipe0_clock < clock)
2939 PUT_MODE (insn, TImode);
2940 spu_sched_variable_issue (file, verbose, insn, -1);
2941 return 0;
2944 /* In general, we want to emit nops to increase dual issue, but dual
2945 issue isn't faster when one of the insns could be scheduled later
2946 without effecting the critical path. We look at INSN_PRIORITY to
2947 make a good guess, but it isn't perfect so -mdual-nops=n can be
2948 used to effect it. */
2949 if (in_spu_reorg && spu_dual_nops < 10)
2951 /* When we are at an even address and we are not issuing nops to
2952 improve scheduling then we need to advance the cycle. */
2953 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2954 && (spu_dual_nops == 0
2955 || (pipe_1 != -1
2956 && prev_priority >
2957 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2958 return 0;
2960 /* When at an odd address, schedule the highest priority insn
2961 without considering pipeline. */
2962 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2963 && (spu_dual_nops == 0
2964 || (prev_priority >
2965 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2966 return 1;
2970 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2971 pipe0 insn in the ready list, schedule it. */
2972 if (pipe0_clock < clock && pipe_0 >= 0)
2973 schedule_i = pipe_0;
2975 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2976 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2977 else
2978 schedule_i = pipe_1;
2980 if (schedule_i > -1)
2982 insn = ready[schedule_i];
2983 ready[schedule_i] = ready[nready - 1];
2984 ready[nready - 1] = insn;
2985 return 1;
2987 return 0;
2990 /* INSN is dependent on DEP_INSN. */
2991 static int
2992 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2993 int cost, unsigned int)
2995 rtx set;
2997 /* The blockage pattern is used to prevent instructions from being
2998 moved across it and has no cost. */
2999 if (INSN_CODE (insn) == CODE_FOR_blockage
3000 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3001 return 0;
3003 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3004 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3005 return 0;
3007 /* Make sure hbrps are spread out. */
3008 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3009 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3010 return 8;
3012 /* Make sure hints and hbrps are 2 cycles apart. */
3013 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3014 || INSN_CODE (insn) == CODE_FOR_hbr)
3015 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3016 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3017 return 2;
3019 /* An hbrp has no real dependency on other insns. */
3020 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3021 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3022 return 0;
3024 /* Assuming that it is unlikely an argument register will be used in
3025 the first cycle of the called function, we reduce the cost for
3026 slightly better scheduling of dep_insn. When not hinted, the
3027 mispredicted branch would hide the cost as well. */
3028 if (CALL_P (insn))
3030 rtx target = get_branch_target (insn);
3031 if (GET_CODE (target) != REG || !set_of (target, insn))
3032 return cost - 2;
3033 return cost;
3036 /* And when returning from a function, let's assume the return values
3037 are completed sooner too. */
3038 if (CALL_P (dep_insn))
3039 return cost - 2;
3041 /* Make sure an instruction that loads from the back chain is schedule
3042 away from the return instruction so a hint is more likely to get
3043 issued. */
3044 if (INSN_CODE (insn) == CODE_FOR__return
3045 && (set = single_set (dep_insn))
3046 && GET_CODE (SET_DEST (set)) == REG
3047 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3048 return 20;
3050 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3051 scheduler makes every insn in a block anti-dependent on the final
3052 jump_insn. We adjust here so higher cost insns will get scheduled
3053 earlier. */
3054 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3055 return insn_sched_cost (dep_insn) - 3;
3057 return cost;
3060 /* Create a CONST_DOUBLE from a string. */
3062 spu_float_const (const char *string, machine_mode mode)
3064 REAL_VALUE_TYPE value;
3065 value = REAL_VALUE_ATOF (string, mode);
3066 return const_double_from_real_value (value, mode);
3070 spu_constant_address_p (rtx x)
3072 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3073 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3074 || GET_CODE (x) == HIGH);
3077 static enum spu_immediate
3078 which_immediate_load (HOST_WIDE_INT val)
3080 gcc_assert (val == trunc_int_for_mode (val, SImode));
3082 if (val >= -0x8000 && val <= 0x7fff)
3083 return SPU_IL;
3084 if (val >= 0 && val <= 0x3ffff)
3085 return SPU_ILA;
3086 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3087 return SPU_ILH;
3088 if ((val & 0xffff) == 0)
3089 return SPU_ILHU;
3091 return SPU_NONE;
3094 /* Return true when OP can be loaded by one of the il instructions, or
3095 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3097 immediate_load_p (rtx op, machine_mode mode)
3099 if (CONSTANT_P (op))
3101 enum immediate_class c = classify_immediate (op, mode);
3102 return c == IC_IL1 || c == IC_IL1s
3103 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3105 return 0;
3108 /* Return true if the first SIZE bytes of arr is a constant that can be
3109 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3110 represent the size and offset of the instruction to use. */
3111 static int
3112 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3114 int cpat, run, i, start;
3115 cpat = 1;
3116 run = 0;
3117 start = -1;
3118 for (i = 0; i < size && cpat; i++)
3119 if (arr[i] != i+16)
3121 if (!run)
3123 start = i;
3124 if (arr[i] == 3)
3125 run = 1;
3126 else if (arr[i] == 2 && arr[i+1] == 3)
3127 run = 2;
3128 else if (arr[i] == 0)
3130 while (arr[i+run] == run && i+run < 16)
3131 run++;
3132 if (run != 4 && run != 8)
3133 cpat = 0;
3135 else
3136 cpat = 0;
3137 if ((i & (run-1)) != 0)
3138 cpat = 0;
3139 i += run;
3141 else
3142 cpat = 0;
3144 if (cpat && (run || size < 16))
3146 if (run == 0)
3147 run = 1;
3148 if (prun)
3149 *prun = run;
3150 if (pstart)
3151 *pstart = start == -1 ? 16-run : start;
3152 return 1;
3154 return 0;
3157 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3158 it into a register. MODE is only valid when OP is a CONST_INT. */
3159 static enum immediate_class
3160 classify_immediate (rtx op, machine_mode mode)
3162 HOST_WIDE_INT val;
3163 unsigned char arr[16];
3164 int i, j, repeated, fsmbi, repeat;
3166 gcc_assert (CONSTANT_P (op));
3168 if (GET_MODE (op) != VOIDmode)
3169 mode = GET_MODE (op);
3171 /* A V4SI const_vector with all identical symbols is ok. */
3172 if (!flag_pic
3173 && mode == V4SImode
3174 && GET_CODE (op) == CONST_VECTOR
3175 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3176 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3177 op = unwrap_const_vec_duplicate (op);
3179 switch (GET_CODE (op))
3181 case SYMBOL_REF:
3182 case LABEL_REF:
3183 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3185 case CONST:
3186 /* We can never know if the resulting address fits in 18 bits and can be
3187 loaded with ila. For now, assume the address will not overflow if
3188 the displacement is "small" (fits 'K' constraint). */
3189 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3191 rtx sym = XEXP (XEXP (op, 0), 0);
3192 rtx cst = XEXP (XEXP (op, 0), 1);
3194 if (GET_CODE (sym) == SYMBOL_REF
3195 && GET_CODE (cst) == CONST_INT
3196 && satisfies_constraint_K (cst))
3197 return IC_IL1s;
3199 return IC_IL2s;
3201 case HIGH:
3202 return IC_IL1s;
3204 case CONST_VECTOR:
3205 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3206 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3207 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3208 return IC_POOL;
3209 /* Fall through. */
3211 case CONST_INT:
3212 case CONST_DOUBLE:
3213 constant_to_array (mode, op, arr);
3215 /* Check that each 4-byte slot is identical. */
3216 repeated = 1;
3217 for (i = 4; i < 16; i += 4)
3218 for (j = 0; j < 4; j++)
3219 if (arr[j] != arr[i + j])
3220 repeated = 0;
3222 if (repeated)
3224 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3225 val = trunc_int_for_mode (val, SImode);
3227 if (which_immediate_load (val) != SPU_NONE)
3228 return IC_IL1;
3231 /* Any mode of 2 bytes or smaller can be loaded with an il
3232 instruction. */
3233 gcc_assert (GET_MODE_SIZE (mode) > 2);
3235 fsmbi = 1;
3236 repeat = 0;
3237 for (i = 0; i < 16 && fsmbi; i++)
3238 if (arr[i] != 0 && repeat == 0)
3239 repeat = arr[i];
3240 else if (arr[i] != 0 && arr[i] != repeat)
3241 fsmbi = 0;
3242 if (fsmbi)
3243 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3245 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3246 return IC_CPAT;
3248 if (repeated)
3249 return IC_IL2;
3251 return IC_POOL;
3252 default:
3253 break;
3255 gcc_unreachable ();
3258 static enum spu_immediate
3259 which_logical_immediate (HOST_WIDE_INT val)
3261 gcc_assert (val == trunc_int_for_mode (val, SImode));
3263 if (val >= -0x200 && val <= 0x1ff)
3264 return SPU_ORI;
3265 if (val >= 0 && val <= 0xffff)
3266 return SPU_IOHL;
3267 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3269 val = trunc_int_for_mode (val, HImode);
3270 if (val >= -0x200 && val <= 0x1ff)
3271 return SPU_ORHI;
3272 if ((val & 0xff) == ((val >> 8) & 0xff))
3274 val = trunc_int_for_mode (val, QImode);
3275 if (val >= -0x200 && val <= 0x1ff)
3276 return SPU_ORBI;
3279 return SPU_NONE;
3282 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3283 CONST_DOUBLEs. */
3284 static int
3285 const_vector_immediate_p (rtx x)
3287 int i;
3288 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3289 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3290 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3291 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3292 return 0;
3293 return 1;
3297 logical_immediate_p (rtx op, machine_mode mode)
3299 HOST_WIDE_INT val;
3300 unsigned char arr[16];
3301 int i, j;
3303 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3304 || GET_CODE (op) == CONST_VECTOR);
3306 if (GET_CODE (op) == CONST_VECTOR
3307 && !const_vector_immediate_p (op))
3308 return 0;
3310 if (GET_MODE (op) != VOIDmode)
3311 mode = GET_MODE (op);
3313 constant_to_array (mode, op, arr);
3315 /* Check that bytes are repeated. */
3316 for (i = 4; i < 16; i += 4)
3317 for (j = 0; j < 4; j++)
3318 if (arr[j] != arr[i + j])
3319 return 0;
3321 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3322 val = trunc_int_for_mode (val, SImode);
3324 i = which_logical_immediate (val);
3325 return i != SPU_NONE && i != SPU_IOHL;
3329 iohl_immediate_p (rtx op, machine_mode mode)
3331 HOST_WIDE_INT val;
3332 unsigned char arr[16];
3333 int i, j;
3335 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3336 || GET_CODE (op) == CONST_VECTOR);
3338 if (GET_CODE (op) == CONST_VECTOR
3339 && !const_vector_immediate_p (op))
3340 return 0;
3342 if (GET_MODE (op) != VOIDmode)
3343 mode = GET_MODE (op);
3345 constant_to_array (mode, op, arr);
3347 /* Check that bytes are repeated. */
3348 for (i = 4; i < 16; i += 4)
3349 for (j = 0; j < 4; j++)
3350 if (arr[j] != arr[i + j])
3351 return 0;
3353 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3354 val = trunc_int_for_mode (val, SImode);
3356 return val >= 0 && val <= 0xffff;
3360 arith_immediate_p (rtx op, machine_mode mode,
3361 HOST_WIDE_INT low, HOST_WIDE_INT high)
3363 HOST_WIDE_INT val;
3364 unsigned char arr[16];
3365 int bytes, i, j;
3367 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3368 || GET_CODE (op) == CONST_VECTOR);
3370 if (GET_CODE (op) == CONST_VECTOR
3371 && !const_vector_immediate_p (op))
3372 return 0;
3374 if (GET_MODE (op) != VOIDmode)
3375 mode = GET_MODE (op);
3377 constant_to_array (mode, op, arr);
3379 bytes = GET_MODE_UNIT_SIZE (mode);
3380 mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
3382 /* Check that bytes are repeated. */
3383 for (i = bytes; i < 16; i += bytes)
3384 for (j = 0; j < bytes; j++)
3385 if (arr[j] != arr[i + j])
3386 return 0;
3388 val = arr[0];
3389 for (j = 1; j < bytes; j++)
3390 val = (val << 8) | arr[j];
3392 val = trunc_int_for_mode (val, mode);
3394 return val >= low && val <= high;
3397 /* TRUE when op is an immediate and an exact power of 2, and given that
3398 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3399 all entries must be the same. */
3400 bool
3401 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3403 machine_mode int_mode;
3404 HOST_WIDE_INT val;
3405 unsigned char arr[16];
3406 int bytes, i, j;
3408 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3409 || GET_CODE (op) == CONST_VECTOR);
3411 if (GET_CODE (op) == CONST_VECTOR
3412 && !const_vector_immediate_p (op))
3413 return 0;
3415 if (GET_MODE (op) != VOIDmode)
3416 mode = GET_MODE (op);
3418 constant_to_array (mode, op, arr);
3420 mode = GET_MODE_INNER (mode);
3422 bytes = GET_MODE_SIZE (mode);
3423 int_mode = int_mode_for_mode (mode).require ();
3425 /* Check that bytes are repeated. */
3426 for (i = bytes; i < 16; i += bytes)
3427 for (j = 0; j < bytes; j++)
3428 if (arr[j] != arr[i + j])
3429 return 0;
3431 val = arr[0];
3432 for (j = 1; j < bytes; j++)
3433 val = (val << 8) | arr[j];
3435 val = trunc_int_for_mode (val, int_mode);
3437 /* Currently, we only handle SFmode */
3438 gcc_assert (mode == SFmode);
3439 if (mode == SFmode)
3441 int exp = (val >> 23) - 127;
3442 return val > 0 && (val & 0x007fffff) == 0
3443 && exp >= low && exp <= high;
3445 return FALSE;
3448 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3450 static bool
3451 ea_symbol_ref_p (const_rtx x)
3453 tree decl;
3455 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3457 rtx plus = XEXP (x, 0);
3458 rtx op0 = XEXP (plus, 0);
3459 rtx op1 = XEXP (plus, 1);
3460 if (GET_CODE (op1) == CONST_INT)
3461 x = op0;
3464 return (GET_CODE (x) == SYMBOL_REF
3465 && (decl = SYMBOL_REF_DECL (x)) != 0
3466 && TREE_CODE (decl) == VAR_DECL
3467 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3470 /* We accept:
3471 - any 32-bit constant (SImode, SFmode)
3472 - any constant that can be generated with fsmbi (any mode)
3473 - a 64-bit constant where the high and low bits are identical
3474 (DImode, DFmode)
3475 - a 128-bit constant where the four 32-bit words match. */
3476 bool
3477 spu_legitimate_constant_p (machine_mode mode, rtx x)
3479 subrtx_iterator::array_type array;
3480 if (GET_CODE (x) == HIGH)
3481 x = XEXP (x, 0);
3483 /* Reject any __ea qualified reference. These can't appear in
3484 instructions but must be forced to the constant pool. */
3485 FOR_EACH_SUBRTX (iter, array, x, ALL)
3486 if (ea_symbol_ref_p (*iter))
3487 return 0;
3489 /* V4SI with all identical symbols is valid. */
3490 if (!flag_pic
3491 && mode == V4SImode
3492 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3493 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3494 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3495 return const_vec_duplicate_p (x);
3497 if (GET_CODE (x) == CONST_VECTOR
3498 && !const_vector_immediate_p (x))
3499 return 0;
3500 return 1;
3503 /* Valid address are:
3504 - symbol_ref, label_ref, const
3505 - reg
3506 - reg + const_int, where const_int is 16 byte aligned
3507 - reg + reg, alignment doesn't matter
3508 The alignment matters in the reg+const case because lqd and stqd
3509 ignore the 4 least significant bits of the const. We only care about
3510 16 byte modes because the expand phase will change all smaller MEM
3511 references to TImode. */
3512 static bool
3513 spu_legitimate_address_p (machine_mode mode,
3514 rtx x, bool reg_ok_strict)
3516 int aligned = GET_MODE_SIZE (mode) >= 16;
3517 if (aligned
3518 && GET_CODE (x) == AND
3519 && GET_CODE (XEXP (x, 1)) == CONST_INT
3520 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3521 x = XEXP (x, 0);
3522 switch (GET_CODE (x))
3524 case LABEL_REF:
3525 return !TARGET_LARGE_MEM;
3527 case SYMBOL_REF:
3528 case CONST:
3529 /* Keep __ea references until reload so that spu_expand_mov can see them
3530 in MEMs. */
3531 if (ea_symbol_ref_p (x))
3532 return !reload_in_progress && !reload_completed;
3533 return !TARGET_LARGE_MEM;
3535 case CONST_INT:
3536 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3538 case SUBREG:
3539 x = XEXP (x, 0);
3540 if (!REG_P (x))
3541 return 0;
3542 /* FALLTHRU */
3544 case REG:
3545 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3547 case PLUS:
3548 case LO_SUM:
3550 rtx op0 = XEXP (x, 0);
3551 rtx op1 = XEXP (x, 1);
3552 if (GET_CODE (op0) == SUBREG)
3553 op0 = XEXP (op0, 0);
3554 if (GET_CODE (op1) == SUBREG)
3555 op1 = XEXP (op1, 0);
3556 if (GET_CODE (op0) == REG
3557 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3558 && GET_CODE (op1) == CONST_INT
3559 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3560 /* If virtual registers are involved, the displacement will
3561 change later on anyway, so checking would be premature.
3562 Reload will make sure the final displacement after
3563 register elimination is OK. */
3564 || op0 == arg_pointer_rtx
3565 || op0 == frame_pointer_rtx
3566 || op0 == virtual_stack_vars_rtx)
3567 && (!aligned || (INTVAL (op1) & 15) == 0))
3568 return TRUE;
3569 if (GET_CODE (op0) == REG
3570 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3571 && GET_CODE (op1) == REG
3572 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3573 return TRUE;
3575 break;
3577 default:
3578 break;
3580 return FALSE;
3583 /* Like spu_legitimate_address_p, except with named addresses. */
3584 static bool
3585 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3586 bool reg_ok_strict, addr_space_t as)
3588 if (as == ADDR_SPACE_EA)
3589 return (REG_P (x) && (GET_MODE (x) == EAmode));
3591 else if (as != ADDR_SPACE_GENERIC)
3592 gcc_unreachable ();
3594 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3597 /* When the address is reg + const_int, force the const_int into a
3598 register. */
3599 static rtx
3600 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3601 machine_mode mode ATTRIBUTE_UNUSED)
3603 rtx op0, op1;
3604 /* Make sure both operands are registers. */
3605 if (GET_CODE (x) == PLUS)
3607 op0 = XEXP (x, 0);
3608 op1 = XEXP (x, 1);
3609 if (ALIGNED_SYMBOL_REF_P (op0))
3611 op0 = force_reg (Pmode, op0);
3612 mark_reg_pointer (op0, 128);
3614 else if (GET_CODE (op0) != REG)
3615 op0 = force_reg (Pmode, op0);
3616 if (ALIGNED_SYMBOL_REF_P (op1))
3618 op1 = force_reg (Pmode, op1);
3619 mark_reg_pointer (op1, 128);
3621 else if (GET_CODE (op1) != REG)
3622 op1 = force_reg (Pmode, op1);
3623 x = gen_rtx_PLUS (Pmode, op0, op1);
3625 return x;
3628 /* Like spu_legitimate_address, except with named address support. */
3629 static rtx
3630 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3631 addr_space_t as)
3633 if (as != ADDR_SPACE_GENERIC)
3634 return x;
3636 return spu_legitimize_address (x, oldx, mode);
3639 /* Reload reg + const_int for out-of-range displacements. */
3641 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3642 int opnum, int type)
3644 bool removed_and = false;
3646 if (GET_CODE (ad) == AND
3647 && CONST_INT_P (XEXP (ad, 1))
3648 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3650 ad = XEXP (ad, 0);
3651 removed_and = true;
3654 if (GET_CODE (ad) == PLUS
3655 && REG_P (XEXP (ad, 0))
3656 && CONST_INT_P (XEXP (ad, 1))
3657 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3658 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3660 /* Unshare the sum. */
3661 ad = copy_rtx (ad);
3663 /* Reload the displacement. */
3664 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3665 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3666 opnum, (enum reload_type) type);
3668 /* Add back AND for alignment if we stripped it. */
3669 if (removed_and)
3670 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3672 return ad;
3675 return NULL_RTX;
3678 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3679 struct attribute_spec.handler. */
3680 static tree
3681 spu_handle_fndecl_attribute (tree * node,
3682 tree name,
3683 tree args ATTRIBUTE_UNUSED,
3684 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3686 if (TREE_CODE (*node) != FUNCTION_DECL)
3688 warning (0, "%qE attribute only applies to functions",
3689 name);
3690 *no_add_attrs = true;
3693 return NULL_TREE;
3696 /* Handle the "vector" attribute. */
3697 static tree
3698 spu_handle_vector_attribute (tree * node, tree name,
3699 tree args ATTRIBUTE_UNUSED,
3700 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3702 tree type = *node, result = NULL_TREE;
3703 machine_mode mode;
3704 int unsigned_p;
3706 while (POINTER_TYPE_P (type)
3707 || TREE_CODE (type) == FUNCTION_TYPE
3708 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3709 type = TREE_TYPE (type);
3711 mode = TYPE_MODE (type);
3713 unsigned_p = TYPE_UNSIGNED (type);
3714 switch (mode)
3716 case E_DImode:
3717 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3718 break;
3719 case E_SImode:
3720 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3721 break;
3722 case E_HImode:
3723 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3724 break;
3725 case E_QImode:
3726 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3727 break;
3728 case E_SFmode:
3729 result = V4SF_type_node;
3730 break;
3731 case E_DFmode:
3732 result = V2DF_type_node;
3733 break;
3734 default:
3735 break;
3738 /* Propagate qualifiers attached to the element type
3739 onto the vector type. */
3740 if (result && result != type && TYPE_QUALS (type))
3741 result = build_qualified_type (result, TYPE_QUALS (type));
3743 *no_add_attrs = true; /* No need to hang on to the attribute. */
3745 if (!result)
3746 warning (0, "%qE attribute ignored", name);
3747 else
3748 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3750 return NULL_TREE;
3753 /* Return nonzero if FUNC is a naked function. */
3754 static int
3755 spu_naked_function_p (tree func)
3757 tree a;
3759 if (TREE_CODE (func) != FUNCTION_DECL)
3760 abort ();
3762 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3763 return a != NULL_TREE;
3767 spu_initial_elimination_offset (int from, int to)
3769 int saved_regs_size = spu_saved_regs_size ();
3770 int sp_offset = 0;
3771 if (!crtl->is_leaf || crtl->outgoing_args_size
3772 || get_frame_size () || saved_regs_size)
3773 sp_offset = STACK_POINTER_OFFSET;
3774 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3775 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3776 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3777 return get_frame_size ();
3778 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3779 return sp_offset + crtl->outgoing_args_size
3780 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3781 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3782 return get_frame_size () + saved_regs_size + sp_offset;
3783 else
3784 gcc_unreachable ();
3788 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3790 machine_mode mode = TYPE_MODE (type);
3791 int byte_size = ((mode == BLKmode)
3792 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3794 /* Make sure small structs are left justified in a register. */
3795 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3796 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3798 machine_mode smode;
3799 rtvec v;
3800 int i;
3801 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3802 int n = byte_size / UNITS_PER_WORD;
3803 v = rtvec_alloc (nregs);
3804 for (i = 0; i < n; i++)
3806 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3807 gen_rtx_REG (TImode,
3808 FIRST_RETURN_REGNUM
3809 + i),
3810 GEN_INT (UNITS_PER_WORD * i));
3811 byte_size -= UNITS_PER_WORD;
3814 if (n < nregs)
3816 if (byte_size < 4)
3817 byte_size = 4;
3818 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3819 RTVEC_ELT (v, n) =
3820 gen_rtx_EXPR_LIST (VOIDmode,
3821 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3822 GEN_INT (UNITS_PER_WORD * n));
3824 return gen_rtx_PARALLEL (mode, v);
3826 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3829 static rtx
3830 spu_function_arg (cumulative_args_t cum_v,
3831 machine_mode mode,
3832 const_tree type, bool named ATTRIBUTE_UNUSED)
3834 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3835 int byte_size;
3837 if (*cum >= MAX_REGISTER_ARGS)
3838 return 0;
3840 byte_size = ((mode == BLKmode)
3841 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3843 /* The ABI does not allow parameters to be passed partially in
3844 reg and partially in stack. */
3845 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3846 return 0;
3848 /* Make sure small structs are left justified in a register. */
3849 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3850 && byte_size < UNITS_PER_WORD && byte_size > 0)
3852 machine_mode smode;
3853 rtx gr_reg;
3854 if (byte_size < 4)
3855 byte_size = 4;
3856 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3857 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3858 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3859 const0_rtx);
3860 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3862 else
3863 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3866 static void
3867 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3868 const_tree type, bool named ATTRIBUTE_UNUSED)
3870 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3872 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3874 : mode == BLKmode
3875 ? ((int_size_in_bytes (type) + 15) / 16)
3876 : mode == VOIDmode
3878 : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
3881 /* Implement TARGET_FUNCTION_ARG_OFFSET. The SPU ABI wants 32/64-bit
3882 types at offset 0 in the quad-word on the stack. 8/16-bit types
3883 should be at offsets 3/2 respectively. */
3885 static HOST_WIDE_INT
3886 spu_function_arg_offset (machine_mode mode, const_tree type)
3888 if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3889 return 4 - GET_MODE_SIZE (mode);
3890 return 0;
3893 /* Implement TARGET_FUNCTION_ARG_PADDING. */
3895 static pad_direction
3896 spu_function_arg_padding (machine_mode, const_tree)
3898 return PAD_UPWARD;
3901 /* Variable sized types are passed by reference. */
3902 static bool
3903 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3904 machine_mode mode ATTRIBUTE_UNUSED,
3905 const_tree type, bool named ATTRIBUTE_UNUSED)
3907 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3911 /* Var args. */
3913 /* Create and return the va_list datatype.
3915 On SPU, va_list is an array type equivalent to
3917 typedef struct __va_list_tag
3919 void *__args __attribute__((__aligned(16)));
3920 void *__skip __attribute__((__aligned(16)));
3922 } va_list[1];
3924 where __args points to the arg that will be returned by the next
3925 va_arg(), and __skip points to the previous stack frame such that
3926 when __args == __skip we should advance __args by 32 bytes. */
3927 static tree
3928 spu_build_builtin_va_list (void)
3930 tree f_args, f_skip, record, type_decl;
3931 bool owp;
3933 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3935 type_decl =
3936 build_decl (BUILTINS_LOCATION,
3937 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3939 f_args = build_decl (BUILTINS_LOCATION,
3940 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3941 f_skip = build_decl (BUILTINS_LOCATION,
3942 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3944 DECL_FIELD_CONTEXT (f_args) = record;
3945 SET_DECL_ALIGN (f_args, 128);
3946 DECL_USER_ALIGN (f_args) = 1;
3948 DECL_FIELD_CONTEXT (f_skip) = record;
3949 SET_DECL_ALIGN (f_skip, 128);
3950 DECL_USER_ALIGN (f_skip) = 1;
3952 TYPE_STUB_DECL (record) = type_decl;
3953 TYPE_NAME (record) = type_decl;
3954 TYPE_FIELDS (record) = f_args;
3955 DECL_CHAIN (f_args) = f_skip;
3957 /* We know this is being padded and we want it too. It is an internal
3958 type so hide the warnings from the user. */
3959 owp = warn_padded;
3960 warn_padded = false;
3962 layout_type (record);
3964 warn_padded = owp;
3966 /* The correct type is an array type of one element. */
3967 return build_array_type (record, build_index_type (size_zero_node));
3970 /* Implement va_start by filling the va_list structure VALIST.
3971 NEXTARG points to the first anonymous stack argument.
3973 The following global variables are used to initialize
3974 the va_list structure:
3976 crtl->args.info;
3977 the CUMULATIVE_ARGS for this function
3979 crtl->args.arg_offset_rtx:
3980 holds the offset of the first anonymous stack argument
3981 (relative to the virtual arg pointer). */
3983 static void
3984 spu_va_start (tree valist, rtx nextarg)
3986 tree f_args, f_skip;
3987 tree args, skip, t;
3989 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3990 f_skip = DECL_CHAIN (f_args);
3992 valist = build_simple_mem_ref (valist);
3993 args =
3994 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3995 skip =
3996 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3998 /* Find the __args area. */
3999 t = make_tree (TREE_TYPE (args), nextarg);
4000 if (crtl->args.pretend_args_size > 0)
4001 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
4002 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4003 TREE_SIDE_EFFECTS (t) = 1;
4004 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4006 /* Find the __skip area. */
4007 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4008 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4009 - STACK_POINTER_OFFSET));
4010 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4011 TREE_SIDE_EFFECTS (t) = 1;
4012 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4015 /* Gimplify va_arg by updating the va_list structure
4016 VALIST as required to retrieve an argument of type
4017 TYPE, and returning that argument.
4019 ret = va_arg(VALIST, TYPE);
4021 generates code equivalent to:
4023 paddedsize = (sizeof(TYPE) + 15) & -16;
4024 if (VALIST.__args + paddedsize > VALIST.__skip
4025 && VALIST.__args <= VALIST.__skip)
4026 addr = VALIST.__skip + 32;
4027 else
4028 addr = VALIST.__args;
4029 VALIST.__args = addr + paddedsize;
4030 ret = *(TYPE *)addr;
4032 static tree
4033 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4034 gimple_seq * post_p ATTRIBUTE_UNUSED)
4036 tree f_args, f_skip;
4037 tree args, skip;
4038 HOST_WIDE_INT size, rsize;
4039 tree addr, tmp;
4040 bool pass_by_reference_p;
4042 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4043 f_skip = DECL_CHAIN (f_args);
4045 args =
4046 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4047 skip =
4048 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4050 addr = create_tmp_var (ptr_type_node, "va_arg");
4052 /* if an object is dynamically sized, a pointer to it is passed
4053 instead of the object itself. */
4054 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4055 false);
4056 if (pass_by_reference_p)
4057 type = build_pointer_type (type);
4058 size = int_size_in_bytes (type);
4059 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4061 /* build conditional expression to calculate addr. The expression
4062 will be gimplified later. */
4063 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4064 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4065 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4066 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4067 unshare_expr (skip)));
4069 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4070 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4071 unshare_expr (args));
4073 gimplify_assign (addr, tmp, pre_p);
4075 /* update VALIST.__args */
4076 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4077 gimplify_assign (unshare_expr (args), tmp, pre_p);
4079 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4080 addr);
4082 if (pass_by_reference_p)
4083 addr = build_va_arg_indirect_ref (addr);
4085 return build_va_arg_indirect_ref (addr);
4088 /* Save parameter registers starting with the register that corresponds
4089 to the first unnamed parameters. If the first unnamed parameter is
4090 in the stack then save no registers. Set pretend_args_size to the
4091 amount of space needed to save the registers. */
4092 static void
4093 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4094 tree type, int *pretend_size, int no_rtl)
4096 if (!no_rtl)
4098 rtx tmp;
4099 int regno;
4100 int offset;
4101 int ncum = *get_cumulative_args (cum);
4103 /* cum currently points to the last named argument, we want to
4104 start at the next argument. */
4105 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4107 offset = -STACK_POINTER_OFFSET;
4108 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4110 tmp = gen_frame_mem (V4SImode,
4111 plus_constant (Pmode, virtual_incoming_args_rtx,
4112 offset));
4113 emit_move_insn (tmp,
4114 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4115 offset += 16;
4117 *pretend_size = offset + STACK_POINTER_OFFSET;
4121 static void
4122 spu_conditional_register_usage (void)
4124 if (flag_pic)
4126 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4127 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4131 /* This is called any time we inspect the alignment of a register for
4132 addresses. */
4133 static int
4134 reg_aligned_for_addr (rtx x)
4136 int regno =
4137 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4138 return REGNO_POINTER_ALIGN (regno) >= 128;
4141 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4142 into its SYMBOL_REF_FLAGS. */
4143 static void
4144 spu_encode_section_info (tree decl, rtx rtl, int first)
4146 default_encode_section_info (decl, rtl, first);
4148 /* If a variable has a forced alignment to < 16 bytes, mark it with
4149 SYMBOL_FLAG_ALIGN1. */
4150 if (TREE_CODE (decl) == VAR_DECL
4151 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4152 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4155 /* Return TRUE if we are certain the mem refers to a complete object
4156 which is both 16-byte aligned and padded to a 16-byte boundary. This
4157 would make it safe to store with a single instruction.
4158 We guarantee the alignment and padding for static objects by aligning
4159 all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
4160 FIXME: We currently cannot guarantee this for objects on the stack
4161 because assign_parm_setup_stack calls assign_stack_local with the
4162 alignment of the parameter mode and in that case the alignment never
4163 gets adjusted by LOCAL_ALIGNMENT. */
4164 static int
4165 store_with_one_insn_p (rtx mem)
4167 machine_mode mode = GET_MODE (mem);
4168 rtx addr = XEXP (mem, 0);
4169 if (mode == BLKmode)
4170 return 0;
4171 if (GET_MODE_SIZE (mode) >= 16)
4172 return 1;
4173 /* Only static objects. */
4174 if (GET_CODE (addr) == SYMBOL_REF)
4176 /* We use the associated declaration to make sure the access is
4177 referring to the whole object.
4178 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4179 if it is necessary. Will there be cases where one exists, and
4180 the other does not? Will there be cases where both exist, but
4181 have different types? */
4182 tree decl = MEM_EXPR (mem);
4183 if (decl
4184 && TREE_CODE (decl) == VAR_DECL
4185 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4186 return 1;
4187 decl = SYMBOL_REF_DECL (addr);
4188 if (decl
4189 && TREE_CODE (decl) == VAR_DECL
4190 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4191 return 1;
4193 return 0;
4196 /* Return 1 when the address is not valid for a simple load and store as
4197 required by the '_mov*' patterns. We could make this less strict
4198 for loads, but we prefer mem's to look the same so they are more
4199 likely to be merged. */
4200 static int
4201 address_needs_split (rtx mem)
4203 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4204 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4205 || !(store_with_one_insn_p (mem)
4206 || mem_is_padded_component_ref (mem))))
4207 return 1;
4209 return 0;
4212 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4213 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4214 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4216 /* MEM is known to be an __ea qualified memory access. Emit a call to
4217 fetch the ppu memory to local store, and return its address in local
4218 store. */
4220 static void
4221 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4223 if (is_store)
4225 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4226 if (!cache_fetch_dirty)
4227 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4228 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4229 ea_addr, EAmode, ndirty, SImode);
4231 else
4233 if (!cache_fetch)
4234 cache_fetch = init_one_libfunc ("__cache_fetch");
4235 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4236 ea_addr, EAmode);
4240 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4241 dirty bit marking, inline.
4243 The cache control data structure is an array of
4245 struct __cache_tag_array
4247 unsigned int tag_lo[4];
4248 unsigned int tag_hi[4];
4249 void *data_pointer[4];
4250 int reserved[4];
4251 vector unsigned short dirty_bits[4];
4252 } */
4254 static void
4255 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4257 rtx ea_addr_si;
4258 HOST_WIDE_INT v;
4259 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4260 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4261 rtx index_mask = gen_reg_rtx (SImode);
4262 rtx tag_arr = gen_reg_rtx (Pmode);
4263 rtx splat_mask = gen_reg_rtx (TImode);
4264 rtx splat = gen_reg_rtx (V4SImode);
4265 rtx splat_hi = NULL_RTX;
4266 rtx tag_index = gen_reg_rtx (Pmode);
4267 rtx block_off = gen_reg_rtx (SImode);
4268 rtx tag_addr = gen_reg_rtx (Pmode);
4269 rtx tag = gen_reg_rtx (V4SImode);
4270 rtx cache_tag = gen_reg_rtx (V4SImode);
4271 rtx cache_tag_hi = NULL_RTX;
4272 rtx cache_ptrs = gen_reg_rtx (TImode);
4273 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4274 rtx tag_equal = gen_reg_rtx (V4SImode);
4275 rtx tag_equal_hi = NULL_RTX;
4276 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4277 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4278 rtx eq_index = gen_reg_rtx (SImode);
4279 rtx bcomp, hit_label, hit_ref, cont_label;
4280 rtx_insn *insn;
4282 if (spu_ea_model != 32)
4284 splat_hi = gen_reg_rtx (V4SImode);
4285 cache_tag_hi = gen_reg_rtx (V4SImode);
4286 tag_equal_hi = gen_reg_rtx (V4SImode);
4289 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4290 emit_move_insn (tag_arr, tag_arr_sym);
4291 v = 0x0001020300010203LL;
4292 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4293 ea_addr_si = ea_addr;
4294 if (spu_ea_model != 32)
4295 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4297 /* tag_index = ea_addr & (tag_array_size - 128) */
4298 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4300 /* splat ea_addr to all 4 slots. */
4301 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4302 /* Similarly for high 32 bits of ea_addr. */
4303 if (spu_ea_model != 32)
4304 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4306 /* block_off = ea_addr & 127 */
4307 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4309 /* tag_addr = tag_arr + tag_index */
4310 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4312 /* Read cache tags. */
4313 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4314 if (spu_ea_model != 32)
4315 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4316 plus_constant (Pmode,
4317 tag_addr, 16)));
4319 /* tag = ea_addr & -128 */
4320 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4322 /* Read all four cache data pointers. */
4323 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4324 plus_constant (Pmode,
4325 tag_addr, 32)));
4327 /* Compare tags. */
4328 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4329 if (spu_ea_model != 32)
4331 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4332 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4335 /* At most one of the tags compare equal, so tag_equal has one
4336 32-bit slot set to all 1's, with the other slots all zero.
4337 gbb picks off low bit from each byte in the 128-bit registers,
4338 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4339 we have a hit. */
4340 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4341 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4343 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4344 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4346 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4347 (rotating eq_index mod 16 bytes). */
4348 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4349 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4351 /* Add block offset to form final data address. */
4352 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4354 /* Check that we did hit. */
4355 hit_label = gen_label_rtx ();
4356 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4357 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4358 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4359 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4360 hit_ref, pc_rtx)));
4361 /* Say that this branch is very likely to happen. */
4362 add_reg_br_prob_note (insn, profile_probability::very_likely ());
4364 ea_load_store (mem, is_store, ea_addr, data_addr);
4365 cont_label = gen_label_rtx ();
4366 emit_jump_insn (gen_jump (cont_label));
4367 emit_barrier ();
4369 emit_label (hit_label);
4371 if (is_store)
4373 HOST_WIDE_INT v_hi;
4374 rtx dirty_bits = gen_reg_rtx (TImode);
4375 rtx dirty_off = gen_reg_rtx (SImode);
4376 rtx dirty_128 = gen_reg_rtx (TImode);
4377 rtx neg_block_off = gen_reg_rtx (SImode);
4379 /* Set up mask with one dirty bit per byte of the mem we are
4380 writing, starting from top bit. */
4381 v_hi = v = -1;
4382 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4383 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4385 v_hi = v;
4386 v = 0;
4388 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4390 /* Form index into cache dirty_bits. eq_index is one of
4391 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4392 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4393 offset to each of the four dirty_bits elements. */
4394 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4396 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4398 /* Rotate bit mask to proper bit. */
4399 emit_insn (gen_negsi2 (neg_block_off, block_off));
4400 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4401 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4403 /* Or in the new dirty bits. */
4404 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4406 /* Store. */
4407 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4410 emit_label (cont_label);
4413 static rtx
4414 expand_ea_mem (rtx mem, bool is_store)
4416 rtx ea_addr;
4417 rtx data_addr = gen_reg_rtx (Pmode);
4418 rtx new_mem;
4420 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4421 if (optimize_size || optimize == 0)
4422 ea_load_store (mem, is_store, ea_addr, data_addr);
4423 else
4424 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4426 if (ea_alias_set == -1)
4427 ea_alias_set = new_alias_set ();
4429 /* We generate a new MEM RTX to refer to the copy of the data
4430 in the cache. We do not copy memory attributes (except the
4431 alignment) from the original MEM, as they may no longer apply
4432 to the cache copy. */
4433 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4434 set_mem_alias_set (new_mem, ea_alias_set);
4435 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4437 return new_mem;
4441 spu_expand_mov (rtx * ops, machine_mode mode)
4443 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4445 /* Perform the move in the destination SUBREG's inner mode. */
4446 ops[0] = SUBREG_REG (ops[0]);
4447 mode = GET_MODE (ops[0]);
4448 ops[1] = gen_lowpart_common (mode, ops[1]);
4449 gcc_assert (ops[1]);
4452 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4454 rtx from = SUBREG_REG (ops[1]);
4455 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4457 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4458 && GET_MODE_CLASS (imode) == MODE_INT
4459 && subreg_lowpart_p (ops[1]));
4461 if (GET_MODE_SIZE (imode) < 4)
4462 imode = SImode;
4463 if (imode != GET_MODE (from))
4464 from = gen_rtx_SUBREG (imode, from, 0);
4466 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4468 enum insn_code icode = convert_optab_handler (trunc_optab,
4469 mode, imode);
4470 emit_insn (GEN_FCN (icode) (ops[0], from));
4472 else
4473 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4474 return 1;
4477 /* At least one of the operands needs to be a register. */
4478 if ((reload_in_progress | reload_completed) == 0
4479 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4481 rtx temp = force_reg (mode, ops[1]);
4482 emit_move_insn (ops[0], temp);
4483 return 1;
4485 if (reload_in_progress || reload_completed)
4487 if (CONSTANT_P (ops[1]))
4488 return spu_split_immediate (ops);
4489 return 0;
4492 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4493 extend them. */
4494 if (GET_CODE (ops[1]) == CONST_INT)
4496 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4497 if (val != INTVAL (ops[1]))
4499 emit_move_insn (ops[0], GEN_INT (val));
4500 return 1;
4503 if (MEM_P (ops[0]))
4505 if (MEM_ADDR_SPACE (ops[0]))
4506 ops[0] = expand_ea_mem (ops[0], true);
4507 return spu_split_store (ops);
4509 if (MEM_P (ops[1]))
4511 if (MEM_ADDR_SPACE (ops[1]))
4512 ops[1] = expand_ea_mem (ops[1], false);
4513 return spu_split_load (ops);
4516 return 0;
4519 static void
4520 spu_convert_move (rtx dst, rtx src)
4522 machine_mode mode = GET_MODE (dst);
4523 machine_mode int_mode = int_mode_for_mode (mode).require ();
4524 rtx reg;
4525 gcc_assert (GET_MODE (src) == TImode);
4526 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4527 emit_insn (gen_rtx_SET (reg,
4528 gen_rtx_TRUNCATE (int_mode,
4529 gen_rtx_LSHIFTRT (TImode, src,
4530 GEN_INT (int_mode == DImode ? 64 : 96)))));
4531 if (int_mode != mode)
4533 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4534 emit_move_insn (dst, reg);
4538 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4539 the address from SRC and SRC+16. Return a REG or CONST_INT that
4540 specifies how many bytes to rotate the loaded registers, plus any
4541 extra from EXTRA_ROTQBY. The address and rotate amounts are
4542 normalized to improve merging of loads and rotate computations. */
4543 static rtx
4544 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4546 rtx addr = XEXP (src, 0);
4547 rtx p0, p1, rot, addr0, addr1;
4548 int rot_amt;
4550 rot = 0;
4551 rot_amt = 0;
4553 if (MEM_ALIGN (src) >= 128)
4554 /* Address is already aligned; simply perform a TImode load. */ ;
4555 else if (GET_CODE (addr) == PLUS)
4557 /* 8 cases:
4558 aligned reg + aligned reg => lqx
4559 aligned reg + unaligned reg => lqx, rotqby
4560 aligned reg + aligned const => lqd
4561 aligned reg + unaligned const => lqd, rotqbyi
4562 unaligned reg + aligned reg => lqx, rotqby
4563 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4564 unaligned reg + aligned const => lqd, rotqby
4565 unaligned reg + unaligned const -> not allowed by legitimate address
4567 p0 = XEXP (addr, 0);
4568 p1 = XEXP (addr, 1);
4569 if (!reg_aligned_for_addr (p0))
4571 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4573 rot = gen_reg_rtx (SImode);
4574 emit_insn (gen_addsi3 (rot, p0, p1));
4576 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4578 if (INTVAL (p1) > 0
4579 && REG_POINTER (p0)
4580 && INTVAL (p1) * BITS_PER_UNIT
4581 < REGNO_POINTER_ALIGN (REGNO (p0)))
4583 rot = gen_reg_rtx (SImode);
4584 emit_insn (gen_addsi3 (rot, p0, p1));
4585 addr = p0;
4587 else
4589 rtx x = gen_reg_rtx (SImode);
4590 emit_move_insn (x, p1);
4591 if (!spu_arith_operand (p1, SImode))
4592 p1 = x;
4593 rot = gen_reg_rtx (SImode);
4594 emit_insn (gen_addsi3 (rot, p0, p1));
4595 addr = gen_rtx_PLUS (Pmode, p0, x);
4598 else
4599 rot = p0;
4601 else
4603 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4605 rot_amt = INTVAL (p1) & 15;
4606 if (INTVAL (p1) & -16)
4608 p1 = GEN_INT (INTVAL (p1) & -16);
4609 addr = gen_rtx_PLUS (SImode, p0, p1);
4611 else
4612 addr = p0;
4614 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4615 rot = p1;
4618 else if (REG_P (addr))
4620 if (!reg_aligned_for_addr (addr))
4621 rot = addr;
4623 else if (GET_CODE (addr) == CONST)
4625 if (GET_CODE (XEXP (addr, 0)) == PLUS
4626 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4627 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4629 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4630 if (rot_amt & -16)
4631 addr = gen_rtx_CONST (Pmode,
4632 gen_rtx_PLUS (Pmode,
4633 XEXP (XEXP (addr, 0), 0),
4634 GEN_INT (rot_amt & -16)));
4635 else
4636 addr = XEXP (XEXP (addr, 0), 0);
4638 else
4640 rot = gen_reg_rtx (Pmode);
4641 emit_move_insn (rot, addr);
4644 else if (GET_CODE (addr) == CONST_INT)
4646 rot_amt = INTVAL (addr);
4647 addr = GEN_INT (rot_amt & -16);
4649 else if (!ALIGNED_SYMBOL_REF_P (addr))
4651 rot = gen_reg_rtx (Pmode);
4652 emit_move_insn (rot, addr);
4655 rot_amt += extra_rotby;
4657 rot_amt &= 15;
4659 if (rot && rot_amt)
4661 rtx x = gen_reg_rtx (SImode);
4662 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4663 rot = x;
4664 rot_amt = 0;
4666 if (!rot && rot_amt)
4667 rot = GEN_INT (rot_amt);
4669 addr0 = copy_rtx (addr);
4670 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4671 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4673 if (dst1)
4675 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4676 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4677 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4680 return rot;
4684 spu_split_load (rtx * ops)
4686 machine_mode mode = GET_MODE (ops[0]);
4687 rtx addr, load, rot;
4688 int rot_amt;
4690 if (GET_MODE_SIZE (mode) >= 16)
4691 return 0;
4693 addr = XEXP (ops[1], 0);
4694 gcc_assert (GET_CODE (addr) != AND);
4696 if (!address_needs_split (ops[1]))
4698 ops[1] = change_address (ops[1], TImode, addr);
4699 load = gen_reg_rtx (TImode);
4700 emit_insn (gen__movti (load, ops[1]));
4701 spu_convert_move (ops[0], load);
4702 return 1;
4705 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4707 load = gen_reg_rtx (TImode);
4708 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4710 if (rot)
4711 emit_insn (gen_rotqby_ti (load, load, rot));
4713 spu_convert_move (ops[0], load);
4714 return 1;
4718 spu_split_store (rtx * ops)
4720 machine_mode mode = GET_MODE (ops[0]);
4721 rtx reg;
4722 rtx addr, p0, p1, p1_lo, smem;
4723 int aform;
4724 int scalar;
4726 if (GET_MODE_SIZE (mode) >= 16)
4727 return 0;
4729 addr = XEXP (ops[0], 0);
4730 gcc_assert (GET_CODE (addr) != AND);
4732 if (!address_needs_split (ops[0]))
4734 reg = gen_reg_rtx (TImode);
4735 emit_insn (gen_spu_convert (reg, ops[1]));
4736 ops[0] = change_address (ops[0], TImode, addr);
4737 emit_move_insn (ops[0], reg);
4738 return 1;
4741 if (GET_CODE (addr) == PLUS)
4743 /* 8 cases:
4744 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4745 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4746 aligned reg + aligned const => lqd, c?d, shuf, stqx
4747 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4748 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4749 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4750 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4751 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4753 aform = 0;
4754 p0 = XEXP (addr, 0);
4755 p1 = p1_lo = XEXP (addr, 1);
4756 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4758 p1_lo = GEN_INT (INTVAL (p1) & 15);
4759 if (reg_aligned_for_addr (p0))
4761 p1 = GEN_INT (INTVAL (p1) & -16);
4762 if (p1 == const0_rtx)
4763 addr = p0;
4764 else
4765 addr = gen_rtx_PLUS (SImode, p0, p1);
4767 else
4769 rtx x = gen_reg_rtx (SImode);
4770 emit_move_insn (x, p1);
4771 addr = gen_rtx_PLUS (SImode, p0, x);
4775 else if (REG_P (addr))
4777 aform = 0;
4778 p0 = addr;
4779 p1 = p1_lo = const0_rtx;
4781 else
4783 aform = 1;
4784 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4785 p1 = 0; /* aform doesn't use p1 */
4786 p1_lo = addr;
4787 if (ALIGNED_SYMBOL_REF_P (addr))
4788 p1_lo = const0_rtx;
4789 else if (GET_CODE (addr) == CONST
4790 && GET_CODE (XEXP (addr, 0)) == PLUS
4791 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4792 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4794 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4795 if ((v & -16) != 0)
4796 addr = gen_rtx_CONST (Pmode,
4797 gen_rtx_PLUS (Pmode,
4798 XEXP (XEXP (addr, 0), 0),
4799 GEN_INT (v & -16)));
4800 else
4801 addr = XEXP (XEXP (addr, 0), 0);
4802 p1_lo = GEN_INT (v & 15);
4804 else if (GET_CODE (addr) == CONST_INT)
4806 p1_lo = GEN_INT (INTVAL (addr) & 15);
4807 addr = GEN_INT (INTVAL (addr) & -16);
4809 else
4811 p1_lo = gen_reg_rtx (SImode);
4812 emit_move_insn (p1_lo, addr);
4816 gcc_assert (aform == 0 || aform == 1);
4817 reg = gen_reg_rtx (TImode);
4819 scalar = store_with_one_insn_p (ops[0]);
4820 if (!scalar)
4822 /* We could copy the flags from the ops[0] MEM to mem here,
4823 We don't because we want this load to be optimized away if
4824 possible, and copying the flags will prevent that in certain
4825 cases, e.g. consider the volatile flag. */
4827 rtx pat = gen_reg_rtx (TImode);
4828 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4829 set_mem_alias_set (lmem, 0);
4830 emit_insn (gen_movti (reg, lmem));
4832 if (!p0 || reg_aligned_for_addr (p0))
4833 p0 = stack_pointer_rtx;
4834 if (!p1_lo)
4835 p1_lo = const0_rtx;
4837 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4838 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4840 else
4842 if (GET_CODE (ops[1]) == REG)
4843 emit_insn (gen_spu_convert (reg, ops[1]));
4844 else if (GET_CODE (ops[1]) == SUBREG)
4845 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4846 else
4847 abort ();
4850 if (GET_MODE_SIZE (mode) < 4 && scalar)
4851 emit_insn (gen_ashlti3
4852 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4854 smem = change_address (ops[0], TImode, copy_rtx (addr));
4855 /* We can't use the previous alias set because the memory has changed
4856 size and can potentially overlap objects of other types. */
4857 set_mem_alias_set (smem, 0);
4859 emit_insn (gen_movti (smem, reg));
4860 return 1;
4863 /* Return TRUE if X is MEM which is a struct member reference
4864 and the member can safely be loaded and stored with a single
4865 instruction because it is padded. */
4866 static int
4867 mem_is_padded_component_ref (rtx x)
4869 tree t = MEM_EXPR (x);
4870 tree r;
4871 if (!t || TREE_CODE (t) != COMPONENT_REF)
4872 return 0;
4873 t = TREE_OPERAND (t, 1);
4874 if (!t || TREE_CODE (t) != FIELD_DECL
4875 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4876 return 0;
4877 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4878 r = DECL_FIELD_CONTEXT (t);
4879 if (!r || TREE_CODE (r) != RECORD_TYPE)
4880 return 0;
4881 /* Make sure they are the same mode */
4882 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4883 return 0;
4884 /* If there are no following fields then the field alignment assures
4885 the structure is padded to the alignment which means this field is
4886 padded too. */
4887 if (TREE_CHAIN (t) == 0)
4888 return 1;
4889 /* If the following field is also aligned then this field will be
4890 padded. */
4891 t = TREE_CHAIN (t);
4892 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4893 return 1;
4894 return 0;
4897 /* Parse the -mfixed-range= option string. */
4898 static void
4899 fix_range (const char *const_str)
4901 int i, first, last;
4902 char *str, *dash, *comma;
4904 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4905 REG2 are either register names or register numbers. The effect
4906 of this option is to mark the registers in the range from REG1 to
4907 REG2 as ``fixed'' so they won't be used by the compiler. */
4909 i = strlen (const_str);
4910 str = (char *) alloca (i + 1);
4911 memcpy (str, const_str, i + 1);
4913 while (1)
4915 dash = strchr (str, '-');
4916 if (!dash)
4918 warning (0, "value of -mfixed-range must have form REG1-REG2");
4919 return;
4921 *dash = '\0';
4922 comma = strchr (dash + 1, ',');
4923 if (comma)
4924 *comma = '\0';
4926 first = decode_reg_name (str);
4927 if (first < 0)
4929 warning (0, "unknown register name: %s", str);
4930 return;
4933 last = decode_reg_name (dash + 1);
4934 if (last < 0)
4936 warning (0, "unknown register name: %s", dash + 1);
4937 return;
4940 *dash = '-';
4942 if (first > last)
4944 warning (0, "%s-%s is an empty range", str, dash + 1);
4945 return;
4948 for (i = first; i <= last; ++i)
4949 fixed_regs[i] = call_used_regs[i] = 1;
4951 if (!comma)
4952 break;
4954 *comma = ',';
4955 str = comma + 1;
4959 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4960 can be generated using the fsmbi instruction. */
4962 fsmbi_const_p (rtx x)
4964 if (CONSTANT_P (x))
4966 /* We can always choose TImode for CONST_INT because the high bits
4967 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4968 enum immediate_class c = classify_immediate (x, TImode);
4969 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4971 return 0;
4974 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4975 can be generated using the cbd, chd, cwd or cdd instruction. */
4977 cpat_const_p (rtx x, machine_mode mode)
4979 if (CONSTANT_P (x))
4981 enum immediate_class c = classify_immediate (x, mode);
4982 return c == IC_CPAT;
4984 return 0;
4988 gen_cpat_const (rtx * ops)
4990 unsigned char dst[16];
4991 int i, offset, shift, isize;
4992 if (GET_CODE (ops[3]) != CONST_INT
4993 || GET_CODE (ops[2]) != CONST_INT
4994 || (GET_CODE (ops[1]) != CONST_INT
4995 && GET_CODE (ops[1]) != REG))
4996 return 0;
4997 if (GET_CODE (ops[1]) == REG
4998 && (!REG_POINTER (ops[1])
4999 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5000 return 0;
5002 for (i = 0; i < 16; i++)
5003 dst[i] = i + 16;
5004 isize = INTVAL (ops[3]);
5005 if (isize == 1)
5006 shift = 3;
5007 else if (isize == 2)
5008 shift = 2;
5009 else
5010 shift = 0;
5011 offset = (INTVAL (ops[2]) +
5012 (GET_CODE (ops[1]) ==
5013 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5014 for (i = 0; i < isize; i++)
5015 dst[offset + i] = i + shift;
5016 return array_to_constant (TImode, dst);
5019 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5020 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5021 than 16 bytes, the value is repeated across the rest of the array. */
5022 void
5023 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5025 HOST_WIDE_INT val;
5026 int i, j, first;
5028 memset (arr, 0, 16);
5029 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5030 if (GET_CODE (x) == CONST_INT
5031 || (GET_CODE (x) == CONST_DOUBLE
5032 && (mode == SFmode || mode == DFmode)))
5034 gcc_assert (mode != VOIDmode && mode != BLKmode);
5036 if (GET_CODE (x) == CONST_DOUBLE)
5037 val = const_double_to_hwint (x);
5038 else
5039 val = INTVAL (x);
5040 first = GET_MODE_SIZE (mode) - 1;
5041 for (i = first; i >= 0; i--)
5043 arr[i] = val & 0xff;
5044 val >>= 8;
5046 /* Splat the constant across the whole array. */
5047 for (j = 0, i = first + 1; i < 16; i++)
5049 arr[i] = arr[j];
5050 j = (j == first) ? 0 : j + 1;
5053 else if (GET_CODE (x) == CONST_DOUBLE)
5055 val = CONST_DOUBLE_LOW (x);
5056 for (i = 15; i >= 8; i--)
5058 arr[i] = val & 0xff;
5059 val >>= 8;
5061 val = CONST_DOUBLE_HIGH (x);
5062 for (i = 7; i >= 0; i--)
5064 arr[i] = val & 0xff;
5065 val >>= 8;
5068 else if (GET_CODE (x) == CONST_VECTOR)
5070 int units;
5071 rtx elt;
5072 mode = GET_MODE_INNER (mode);
5073 units = CONST_VECTOR_NUNITS (x);
5074 for (i = 0; i < units; i++)
5076 elt = CONST_VECTOR_ELT (x, i);
5077 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5079 if (GET_CODE (elt) == CONST_DOUBLE)
5080 val = const_double_to_hwint (elt);
5081 else
5082 val = INTVAL (elt);
5083 first = GET_MODE_SIZE (mode) - 1;
5084 if (first + i * GET_MODE_SIZE (mode) > 16)
5085 abort ();
5086 for (j = first; j >= 0; j--)
5088 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5089 val >>= 8;
5094 else
5095 gcc_unreachable();
5098 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5099 smaller than 16 bytes, use the bytes that would represent that value
5100 in a register, e.g., for QImode return the value of arr[3]. */
5102 array_to_constant (machine_mode mode, const unsigned char arr[16])
5104 machine_mode inner_mode;
5105 rtvec v;
5106 int units, size, i, j, k;
5107 HOST_WIDE_INT val;
5109 if (GET_MODE_CLASS (mode) == MODE_INT
5110 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5112 j = GET_MODE_SIZE (mode);
5113 i = j < 4 ? 4 - j : 0;
5114 for (val = 0; i < j; i++)
5115 val = (val << 8) | arr[i];
5116 val = trunc_int_for_mode (val, mode);
5117 return GEN_INT (val);
5120 if (mode == TImode)
5122 HOST_WIDE_INT high;
5123 for (i = high = 0; i < 8; i++)
5124 high = (high << 8) | arr[i];
5125 for (i = 8, val = 0; i < 16; i++)
5126 val = (val << 8) | arr[i];
5127 return immed_double_const (val, high, TImode);
5129 if (mode == SFmode)
5131 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5132 val = trunc_int_for_mode (val, SImode);
5133 return hwint_to_const_double (SFmode, val);
5135 if (mode == DFmode)
5137 for (i = 0, val = 0; i < 8; i++)
5138 val = (val << 8) | arr[i];
5139 return hwint_to_const_double (DFmode, val);
5142 if (!VECTOR_MODE_P (mode))
5143 abort ();
5145 units = GET_MODE_NUNITS (mode);
5146 size = GET_MODE_UNIT_SIZE (mode);
5147 inner_mode = GET_MODE_INNER (mode);
5148 v = rtvec_alloc (units);
5150 for (k = i = 0; i < units; ++i)
5152 val = 0;
5153 for (j = 0; j < size; j++, k++)
5154 val = (val << 8) | arr[k];
5156 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5157 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5158 else
5159 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5161 if (k > 16)
5162 abort ();
5164 return gen_rtx_CONST_VECTOR (mode, v);
5167 static void
5168 reloc_diagnostic (rtx x)
5170 tree decl = 0;
5171 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5172 return;
5174 if (GET_CODE (x) == SYMBOL_REF)
5175 decl = SYMBOL_REF_DECL (x);
5176 else if (GET_CODE (x) == CONST
5177 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5178 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5180 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5181 if (decl && !DECL_P (decl))
5182 decl = 0;
5184 /* The decl could be a string constant. */
5185 if (decl && DECL_P (decl))
5187 location_t loc;
5188 /* We use last_assemble_variable_decl to get line information. It's
5189 not always going to be right and might not even be close, but will
5190 be right for the more common cases. */
5191 if (!last_assemble_variable_decl || in_section == ctors_section)
5192 loc = DECL_SOURCE_LOCATION (decl);
5193 else
5194 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5196 if (TARGET_WARN_RELOC)
5197 warning_at (loc, 0,
5198 "creating run-time relocation for %qD", decl);
5199 else
5200 error_at (loc,
5201 "creating run-time relocation for %qD", decl);
5203 else
5205 if (TARGET_WARN_RELOC)
5206 warning_at (input_location, 0, "creating run-time relocation");
5207 else
5208 error_at (input_location, "creating run-time relocation");
5212 /* Hook into assemble_integer so we can generate an error for run-time
5213 relocations. The SPU ABI disallows them. */
5214 static bool
5215 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5217 /* By default run-time relocations aren't supported, but we allow them
5218 in case users support it in their own run-time loader. And we provide
5219 a warning for those users that don't. */
5220 if ((GET_CODE (x) == SYMBOL_REF)
5221 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5222 reloc_diagnostic (x);
5224 return default_assemble_integer (x, size, aligned_p);
5227 static void
5228 spu_asm_globalize_label (FILE * file, const char *name)
5230 fputs ("\t.global\t", file);
5231 assemble_name (file, name);
5232 fputs ("\n", file);
5235 static bool
5236 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5237 int opno ATTRIBUTE_UNUSED, int *total,
5238 bool speed ATTRIBUTE_UNUSED)
5240 int code = GET_CODE (x);
5241 int cost = COSTS_N_INSNS (2);
5243 /* Folding to a CONST_VECTOR will use extra space but there might
5244 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5245 only if it allows us to fold away multiple insns. Changing the cost
5246 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5247 because this cost will only be compared against a single insn.
5248 if (code == CONST_VECTOR)
5249 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5252 /* Use defaults for float operations. Not accurate but good enough. */
5253 if (mode == DFmode)
5255 *total = COSTS_N_INSNS (13);
5256 return true;
5258 if (mode == SFmode)
5260 *total = COSTS_N_INSNS (6);
5261 return true;
5263 switch (code)
5265 case CONST_INT:
5266 if (satisfies_constraint_K (x))
5267 *total = 0;
5268 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5269 *total = COSTS_N_INSNS (1);
5270 else
5271 *total = COSTS_N_INSNS (3);
5272 return true;
5274 case CONST:
5275 *total = COSTS_N_INSNS (3);
5276 return true;
5278 case LABEL_REF:
5279 case SYMBOL_REF:
5280 *total = COSTS_N_INSNS (0);
5281 return true;
5283 case CONST_DOUBLE:
5284 *total = COSTS_N_INSNS (5);
5285 return true;
5287 case FLOAT_EXTEND:
5288 case FLOAT_TRUNCATE:
5289 case FLOAT:
5290 case UNSIGNED_FLOAT:
5291 case FIX:
5292 case UNSIGNED_FIX:
5293 *total = COSTS_N_INSNS (7);
5294 return true;
5296 case PLUS:
5297 if (mode == TImode)
5299 *total = COSTS_N_INSNS (9);
5300 return true;
5302 break;
5304 case MULT:
5305 cost =
5306 GET_CODE (XEXP (x, 0)) ==
5307 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5308 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5310 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5312 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5313 cost = COSTS_N_INSNS (14);
5314 if ((val & 0xffff) == 0)
5315 cost = COSTS_N_INSNS (9);
5316 else if (val > 0 && val < 0x10000)
5317 cost = COSTS_N_INSNS (11);
5320 *total = cost;
5321 return true;
5322 case DIV:
5323 case UDIV:
5324 case MOD:
5325 case UMOD:
5326 *total = COSTS_N_INSNS (20);
5327 return true;
5328 case ROTATE:
5329 case ROTATERT:
5330 case ASHIFT:
5331 case ASHIFTRT:
5332 case LSHIFTRT:
5333 *total = COSTS_N_INSNS (4);
5334 return true;
5335 case UNSPEC:
5336 if (XINT (x, 1) == UNSPEC_CONVERT)
5337 *total = COSTS_N_INSNS (0);
5338 else
5339 *total = COSTS_N_INSNS (4);
5340 return true;
5342 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5343 if (GET_MODE_CLASS (mode) == MODE_INT
5344 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5345 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5346 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5347 *total = cost;
5348 return true;
5351 static scalar_int_mode
5352 spu_unwind_word_mode (void)
5354 return SImode;
5357 /* Decide whether we can make a sibling call to a function. DECL is the
5358 declaration of the function being targeted by the call and EXP is the
5359 CALL_EXPR representing the call. */
5360 static bool
5361 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5363 return decl && !TARGET_LARGE_MEM;
5366 /* We need to correctly update the back chain pointer and the Available
5367 Stack Size (which is in the second slot of the sp register.) */
5368 void
5369 spu_allocate_stack (rtx op0, rtx op1)
5371 HOST_WIDE_INT v;
5372 rtx chain = gen_reg_rtx (V4SImode);
5373 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5374 rtx sp = gen_reg_rtx (V4SImode);
5375 rtx splatted = gen_reg_rtx (V4SImode);
5376 rtx pat = gen_reg_rtx (TImode);
5378 /* copy the back chain so we can save it back again. */
5379 emit_move_insn (chain, stack_bot);
5381 op1 = force_reg (SImode, op1);
5383 v = 0x1020300010203ll;
5384 emit_move_insn (pat, immed_double_const (v, v, TImode));
5385 emit_insn (gen_shufb (splatted, op1, op1, pat));
5387 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5388 emit_insn (gen_subv4si3 (sp, sp, splatted));
5390 if (flag_stack_check || flag_stack_clash_protection)
5392 rtx avail = gen_reg_rtx(SImode);
5393 rtx result = gen_reg_rtx(SImode);
5394 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5395 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5396 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5399 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5401 emit_move_insn (stack_bot, chain);
5403 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5406 void
5407 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5409 static unsigned char arr[16] =
5410 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5411 rtx temp = gen_reg_rtx (SImode);
5412 rtx temp2 = gen_reg_rtx (SImode);
5413 rtx temp3 = gen_reg_rtx (V4SImode);
5414 rtx temp4 = gen_reg_rtx (V4SImode);
5415 rtx pat = gen_reg_rtx (TImode);
5416 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5418 /* Restore the backchain from the first word, sp from the second. */
5419 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5420 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5422 emit_move_insn (pat, array_to_constant (TImode, arr));
5424 /* Compute Available Stack Size for sp */
5425 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5426 emit_insn (gen_shufb (temp3, temp, temp, pat));
5428 /* Compute Available Stack Size for back chain */
5429 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5430 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5431 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5433 emit_insn (gen_addv4si3 (sp, sp, temp3));
5434 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5437 static void
5438 spu_init_libfuncs (void)
5440 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5441 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5442 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5443 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5444 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5445 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5446 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5447 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5448 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5449 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5450 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5451 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5453 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5454 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5456 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5457 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5458 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5459 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5460 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5461 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5462 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5463 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5464 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5465 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5466 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5467 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5469 set_optab_libfunc (smul_optab, TImode, "__multi3");
5470 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5471 set_optab_libfunc (smod_optab, TImode, "__modti3");
5472 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5473 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5474 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5477 /* Make a subreg, stripping any existing subreg. We could possibly just
5478 call simplify_subreg, but in this case we know what we want. */
5480 spu_gen_subreg (machine_mode mode, rtx x)
5482 if (GET_CODE (x) == SUBREG)
5483 x = SUBREG_REG (x);
5484 if (GET_MODE (x) == mode)
5485 return x;
5486 return gen_rtx_SUBREG (mode, x, 0);
5489 static bool
5490 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5492 return (TYPE_MODE (type) == BLKmode
5493 && ((type) == 0
5494 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5495 || int_size_in_bytes (type) >
5496 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5499 /* Create the built-in types and functions */
5501 enum spu_function_code
5503 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5504 #include "spu-builtins.def"
5505 #undef DEF_BUILTIN
5506 NUM_SPU_BUILTINS
5509 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5511 struct spu_builtin_description spu_builtins[] = {
5512 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5513 {fcode, icode, name, type, params},
5514 #include "spu-builtins.def"
5515 #undef DEF_BUILTIN
5518 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5520 /* Returns the spu builtin decl for CODE. */
5522 static tree
5523 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5525 if (code >= NUM_SPU_BUILTINS)
5526 return error_mark_node;
5528 return spu_builtin_decls[code];
5532 static void
5533 spu_init_builtins (void)
5535 struct spu_builtin_description *d;
5536 unsigned int i;
5538 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5539 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5540 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5541 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5542 V4SF_type_node = build_vector_type (float_type_node, 4);
5543 V2DF_type_node = build_vector_type (double_type_node, 2);
5545 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5546 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5547 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5548 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5550 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5552 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5558 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5560 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5561 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5562 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5563 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5565 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5566 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5567 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5568 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5569 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5570 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5571 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5572 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5574 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5575 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5577 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5579 spu_builtin_types[SPU_BTI_PTR] =
5580 build_pointer_type (build_qualified_type
5581 (void_type_node,
5582 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5584 /* For each builtin we build a new prototype. The tree code will make
5585 sure nodes are shared. */
5586 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5588 tree p;
5589 char name[64]; /* build_function will make a copy. */
5590 int parm;
5592 if (d->name == 0)
5593 continue;
5595 /* Find last parm. */
5596 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5599 p = void_list_node;
5600 while (parm > 1)
5601 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5603 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5605 sprintf (name, "__builtin_%s", d->name);
5606 spu_builtin_decls[i] =
5607 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5608 if (d->fcode == SPU_MASK_FOR_LOAD)
5609 TREE_READONLY (spu_builtin_decls[i]) = 1;
5611 /* These builtins don't throw. */
5612 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5616 void
5617 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5619 static unsigned char arr[16] =
5620 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5622 rtx temp = gen_reg_rtx (Pmode);
5623 rtx temp2 = gen_reg_rtx (V4SImode);
5624 rtx temp3 = gen_reg_rtx (V4SImode);
5625 rtx pat = gen_reg_rtx (TImode);
5626 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5628 emit_move_insn (pat, array_to_constant (TImode, arr));
5630 /* Restore the sp. */
5631 emit_move_insn (temp, op1);
5632 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5634 /* Compute available stack size for sp. */
5635 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5636 emit_insn (gen_shufb (temp3, temp, temp, pat));
5638 emit_insn (gen_addv4si3 (sp, sp, temp3));
5639 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5643 spu_safe_dma (HOST_WIDE_INT channel)
5645 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5648 void
5649 spu_builtin_splats (rtx ops[])
5651 machine_mode mode = GET_MODE (ops[0]);
5652 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5654 unsigned char arr[16];
5655 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5656 emit_move_insn (ops[0], array_to_constant (mode, arr));
5658 else
5660 rtx reg = gen_reg_rtx (TImode);
5661 rtx shuf;
5662 if (GET_CODE (ops[1]) != REG
5663 && GET_CODE (ops[1]) != SUBREG)
5664 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5665 switch (mode)
5667 case E_V2DImode:
5668 case E_V2DFmode:
5669 shuf =
5670 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5671 TImode);
5672 break;
5673 case E_V4SImode:
5674 case E_V4SFmode:
5675 shuf =
5676 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5677 TImode);
5678 break;
5679 case E_V8HImode:
5680 shuf =
5681 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5682 TImode);
5683 break;
5684 case E_V16QImode:
5685 shuf =
5686 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5687 TImode);
5688 break;
5689 default:
5690 abort ();
5692 emit_move_insn (reg, shuf);
5693 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5697 void
5698 spu_builtin_extract (rtx ops[])
5700 machine_mode mode;
5701 rtx rot, from, tmp;
5703 mode = GET_MODE (ops[1]);
5705 if (GET_CODE (ops[2]) == CONST_INT)
5707 switch (mode)
5709 case E_V16QImode:
5710 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5711 break;
5712 case E_V8HImode:
5713 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5714 break;
5715 case E_V4SFmode:
5716 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5717 break;
5718 case E_V4SImode:
5719 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5720 break;
5721 case E_V2DImode:
5722 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5723 break;
5724 case E_V2DFmode:
5725 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5726 break;
5727 default:
5728 abort ();
5730 return;
5733 from = spu_gen_subreg (TImode, ops[1]);
5734 rot = gen_reg_rtx (TImode);
5735 tmp = gen_reg_rtx (SImode);
5737 switch (mode)
5739 case E_V16QImode:
5740 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5741 break;
5742 case E_V8HImode:
5743 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5744 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5745 break;
5746 case E_V4SFmode:
5747 case E_V4SImode:
5748 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5749 break;
5750 case E_V2DImode:
5751 case E_V2DFmode:
5752 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5753 break;
5754 default:
5755 abort ();
5757 emit_insn (gen_rotqby_ti (rot, from, tmp));
5759 emit_insn (gen_spu_convert (ops[0], rot));
5762 void
5763 spu_builtin_insert (rtx ops[])
5765 machine_mode mode = GET_MODE (ops[0]);
5766 machine_mode imode = GET_MODE_INNER (mode);
5767 rtx mask = gen_reg_rtx (TImode);
5768 rtx offset;
5770 if (GET_CODE (ops[3]) == CONST_INT)
5771 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5772 else
5774 offset = gen_reg_rtx (SImode);
5775 emit_insn (gen_mulsi3
5776 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5778 emit_insn (gen_cpat
5779 (mask, stack_pointer_rtx, offset,
5780 GEN_INT (GET_MODE_SIZE (imode))));
5781 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5784 void
5785 spu_builtin_promote (rtx ops[])
5787 machine_mode mode, imode;
5788 rtx rot, from, offset;
5789 HOST_WIDE_INT pos;
5791 mode = GET_MODE (ops[0]);
5792 imode = GET_MODE_INNER (mode);
5794 from = gen_reg_rtx (TImode);
5795 rot = spu_gen_subreg (TImode, ops[0]);
5797 emit_insn (gen_spu_convert (from, ops[1]));
5799 if (GET_CODE (ops[2]) == CONST_INT)
5801 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5802 if (GET_MODE_SIZE (imode) < 4)
5803 pos += 4 - GET_MODE_SIZE (imode);
5804 offset = GEN_INT (pos & 15);
5806 else
5808 offset = gen_reg_rtx (SImode);
5809 switch (mode)
5811 case E_V16QImode:
5812 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5813 break;
5814 case E_V8HImode:
5815 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5816 emit_insn (gen_addsi3 (offset, offset, offset));
5817 break;
5818 case E_V4SFmode:
5819 case E_V4SImode:
5820 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5821 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5822 break;
5823 case E_V2DImode:
5824 case E_V2DFmode:
5825 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5826 break;
5827 default:
5828 abort ();
5831 emit_insn (gen_rotqby_ti (rot, from, offset));
5834 static void
5835 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5837 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5838 rtx shuf = gen_reg_rtx (V4SImode);
5839 rtx insn = gen_reg_rtx (V4SImode);
5840 rtx shufc;
5841 rtx insnc;
5842 rtx mem;
5844 fnaddr = force_reg (SImode, fnaddr);
5845 cxt = force_reg (SImode, cxt);
5847 if (TARGET_LARGE_MEM)
5849 rtx rotl = gen_reg_rtx (V4SImode);
5850 rtx mask = gen_reg_rtx (V4SImode);
5851 rtx bi = gen_reg_rtx (SImode);
5852 static unsigned char const shufa[16] = {
5853 2, 3, 0, 1, 18, 19, 16, 17,
5854 0, 1, 2, 3, 16, 17, 18, 19
5856 static unsigned char const insna[16] = {
5857 0x41, 0, 0, 79,
5858 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5859 0x60, 0x80, 0, 79,
5860 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5863 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5864 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5866 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5867 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5868 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5869 emit_insn (gen_selb (insn, insnc, rotl, mask));
5871 mem = adjust_address (m_tramp, V4SImode, 0);
5872 emit_move_insn (mem, insn);
5874 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5875 mem = adjust_address (m_tramp, Pmode, 16);
5876 emit_move_insn (mem, bi);
5878 else
5880 rtx scxt = gen_reg_rtx (SImode);
5881 rtx sfnaddr = gen_reg_rtx (SImode);
5882 static unsigned char const insna[16] = {
5883 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5884 0x30, 0, 0, 0,
5885 0, 0, 0, 0,
5886 0, 0, 0, 0
5889 shufc = gen_reg_rtx (TImode);
5890 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5892 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5893 fits 18 bits and the last 4 are zeros. This will be true if
5894 the stack pointer is initialized to 0x3fff0 at program start,
5895 otherwise the ila instruction will be garbage. */
5897 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5898 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5899 emit_insn (gen_cpat
5900 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5901 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5902 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5904 mem = adjust_address (m_tramp, V4SImode, 0);
5905 emit_move_insn (mem, insn);
5907 emit_insn (gen_sync ());
5910 static bool
5911 spu_warn_func_return (tree decl)
5913 /* Naked functions are implemented entirely in assembly, including the
5914 return sequence, so suppress warnings about this. */
5915 return !spu_naked_function_p (decl);
5918 void
5919 spu_expand_sign_extend (rtx ops[])
5921 unsigned char arr[16];
5922 rtx pat = gen_reg_rtx (TImode);
5923 rtx sign, c;
5924 int i, last;
5925 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5926 if (GET_MODE (ops[1]) == QImode)
5928 sign = gen_reg_rtx (HImode);
5929 emit_insn (gen_extendqihi2 (sign, ops[1]));
5930 for (i = 0; i < 16; i++)
5931 arr[i] = 0x12;
5932 arr[last] = 0x13;
5934 else
5936 for (i = 0; i < 16; i++)
5937 arr[i] = 0x10;
5938 switch (GET_MODE (ops[1]))
5940 case E_HImode:
5941 sign = gen_reg_rtx (SImode);
5942 emit_insn (gen_extendhisi2 (sign, ops[1]));
5943 arr[last] = 0x03;
5944 arr[last - 1] = 0x02;
5945 break;
5946 case E_SImode:
5947 sign = gen_reg_rtx (SImode);
5948 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5949 for (i = 0; i < 4; i++)
5950 arr[last - i] = 3 - i;
5951 break;
5952 case E_DImode:
5953 sign = gen_reg_rtx (SImode);
5954 c = gen_reg_rtx (SImode);
5955 emit_insn (gen_spu_convert (c, ops[1]));
5956 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5957 for (i = 0; i < 8; i++)
5958 arr[last - i] = 7 - i;
5959 break;
5960 default:
5961 abort ();
5964 emit_move_insn (pat, array_to_constant (TImode, arr));
5965 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5968 /* expand vector initialization. If there are any constant parts,
5969 load constant parts first. Then load any non-constant parts. */
5970 void
5971 spu_expand_vector_init (rtx target, rtx vals)
5973 machine_mode mode = GET_MODE (target);
5974 int n_elts = GET_MODE_NUNITS (mode);
5975 int n_var = 0;
5976 bool all_same = true;
5977 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5978 int i;
5980 first = XVECEXP (vals, 0, 0);
5981 for (i = 0; i < n_elts; ++i)
5983 x = XVECEXP (vals, 0, i);
5984 if (!(CONST_INT_P (x)
5985 || GET_CODE (x) == CONST_DOUBLE
5986 || GET_CODE (x) == CONST_FIXED))
5987 ++n_var;
5988 else
5990 if (first_constant == NULL_RTX)
5991 first_constant = x;
5993 if (i > 0 && !rtx_equal_p (x, first))
5994 all_same = false;
5997 /* if all elements are the same, use splats to repeat elements */
5998 if (all_same)
6000 if (!CONSTANT_P (first)
6001 && !register_operand (first, GET_MODE (x)))
6002 first = force_reg (GET_MODE (first), first);
6003 emit_insn (gen_spu_splats (target, first));
6004 return;
6007 /* load constant parts */
6008 if (n_var != n_elts)
6010 if (n_var == 0)
6012 emit_move_insn (target,
6013 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6015 else
6017 rtx constant_parts_rtx = copy_rtx (vals);
6019 gcc_assert (first_constant != NULL_RTX);
6020 /* fill empty slots with the first constant, this increases
6021 our chance of using splats in the recursive call below. */
6022 for (i = 0; i < n_elts; ++i)
6024 x = XVECEXP (constant_parts_rtx, 0, i);
6025 if (!(CONST_INT_P (x)
6026 || GET_CODE (x) == CONST_DOUBLE
6027 || GET_CODE (x) == CONST_FIXED))
6028 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6031 spu_expand_vector_init (target, constant_parts_rtx);
6035 /* load variable parts */
6036 if (n_var != 0)
6038 rtx insert_operands[4];
6040 insert_operands[0] = target;
6041 insert_operands[2] = target;
6042 for (i = 0; i < n_elts; ++i)
6044 x = XVECEXP (vals, 0, i);
6045 if (!(CONST_INT_P (x)
6046 || GET_CODE (x) == CONST_DOUBLE
6047 || GET_CODE (x) == CONST_FIXED))
6049 if (!register_operand (x, GET_MODE (x)))
6050 x = force_reg (GET_MODE (x), x);
6051 insert_operands[1] = x;
6052 insert_operands[3] = GEN_INT (i);
6053 spu_builtin_insert (insert_operands);
6059 /* Return insn index for the vector compare instruction for given CODE,
6060 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6062 static int
6063 get_vec_cmp_insn (enum rtx_code code,
6064 machine_mode dest_mode,
6065 machine_mode op_mode)
6068 switch (code)
6070 case EQ:
6071 if (dest_mode == V16QImode && op_mode == V16QImode)
6072 return CODE_FOR_ceq_v16qi;
6073 if (dest_mode == V8HImode && op_mode == V8HImode)
6074 return CODE_FOR_ceq_v8hi;
6075 if (dest_mode == V4SImode && op_mode == V4SImode)
6076 return CODE_FOR_ceq_v4si;
6077 if (dest_mode == V4SImode && op_mode == V4SFmode)
6078 return CODE_FOR_ceq_v4sf;
6079 if (dest_mode == V2DImode && op_mode == V2DFmode)
6080 return CODE_FOR_ceq_v2df;
6081 break;
6082 case GT:
6083 if (dest_mode == V16QImode && op_mode == V16QImode)
6084 return CODE_FOR_cgt_v16qi;
6085 if (dest_mode == V8HImode && op_mode == V8HImode)
6086 return CODE_FOR_cgt_v8hi;
6087 if (dest_mode == V4SImode && op_mode == V4SImode)
6088 return CODE_FOR_cgt_v4si;
6089 if (dest_mode == V4SImode && op_mode == V4SFmode)
6090 return CODE_FOR_cgt_v4sf;
6091 if (dest_mode == V2DImode && op_mode == V2DFmode)
6092 return CODE_FOR_cgt_v2df;
6093 break;
6094 case GTU:
6095 if (dest_mode == V16QImode && op_mode == V16QImode)
6096 return CODE_FOR_clgt_v16qi;
6097 if (dest_mode == V8HImode && op_mode == V8HImode)
6098 return CODE_FOR_clgt_v8hi;
6099 if (dest_mode == V4SImode && op_mode == V4SImode)
6100 return CODE_FOR_clgt_v4si;
6101 break;
6102 default:
6103 break;
6105 return -1;
6108 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6109 DMODE is expected destination mode. This is a recursive function. */
6111 static rtx
6112 spu_emit_vector_compare (enum rtx_code rcode,
6113 rtx op0, rtx op1,
6114 machine_mode dmode)
6116 int vec_cmp_insn;
6117 rtx mask;
6118 machine_mode dest_mode;
6119 machine_mode op_mode = GET_MODE (op1);
6121 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6123 /* Floating point vector compare instructions uses destination V4SImode.
6124 Double floating point vector compare instructions uses destination V2DImode.
6125 Move destination to appropriate mode later. */
6126 if (dmode == V4SFmode)
6127 dest_mode = V4SImode;
6128 else if (dmode == V2DFmode)
6129 dest_mode = V2DImode;
6130 else
6131 dest_mode = dmode;
6133 mask = gen_reg_rtx (dest_mode);
6134 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6136 if (vec_cmp_insn == -1)
6138 bool swap_operands = false;
6139 bool try_again = false;
6140 switch (rcode)
6142 case LT:
6143 rcode = GT;
6144 swap_operands = true;
6145 try_again = true;
6146 break;
6147 case LTU:
6148 rcode = GTU;
6149 swap_operands = true;
6150 try_again = true;
6151 break;
6152 case NE:
6153 case UNEQ:
6154 case UNLE:
6155 case UNLT:
6156 case UNGE:
6157 case UNGT:
6158 case UNORDERED:
6159 /* Treat A != B as ~(A==B). */
6161 enum rtx_code rev_code;
6162 enum insn_code nor_code;
6163 rtx rev_mask;
6165 rev_code = reverse_condition_maybe_unordered (rcode);
6166 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6168 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6169 gcc_assert (nor_code != CODE_FOR_nothing);
6170 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6171 if (dmode != dest_mode)
6173 rtx temp = gen_reg_rtx (dest_mode);
6174 convert_move (temp, mask, 0);
6175 return temp;
6177 return mask;
6179 break;
6180 case GE:
6181 case GEU:
6182 case LE:
6183 case LEU:
6184 /* Try GT/GTU/LT/LTU OR EQ */
6186 rtx c_rtx, eq_rtx;
6187 enum insn_code ior_code;
6188 enum rtx_code new_code;
6190 switch (rcode)
6192 case GE: new_code = GT; break;
6193 case GEU: new_code = GTU; break;
6194 case LE: new_code = LT; break;
6195 case LEU: new_code = LTU; break;
6196 default:
6197 gcc_unreachable ();
6200 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6201 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6203 ior_code = optab_handler (ior_optab, dest_mode);
6204 gcc_assert (ior_code != CODE_FOR_nothing);
6205 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6206 if (dmode != dest_mode)
6208 rtx temp = gen_reg_rtx (dest_mode);
6209 convert_move (temp, mask, 0);
6210 return temp;
6212 return mask;
6214 break;
6215 case LTGT:
6216 /* Try LT OR GT */
6218 rtx lt_rtx, gt_rtx;
6219 enum insn_code ior_code;
6221 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6222 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6224 ior_code = optab_handler (ior_optab, dest_mode);
6225 gcc_assert (ior_code != CODE_FOR_nothing);
6226 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6227 if (dmode != dest_mode)
6229 rtx temp = gen_reg_rtx (dest_mode);
6230 convert_move (temp, mask, 0);
6231 return temp;
6233 return mask;
6235 break;
6236 case ORDERED:
6237 /* Implement as (A==A) & (B==B) */
6239 rtx a_rtx, b_rtx;
6240 enum insn_code and_code;
6242 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6243 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6245 and_code = optab_handler (and_optab, dest_mode);
6246 gcc_assert (and_code != CODE_FOR_nothing);
6247 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6248 if (dmode != dest_mode)
6250 rtx temp = gen_reg_rtx (dest_mode);
6251 convert_move (temp, mask, 0);
6252 return temp;
6254 return mask;
6256 break;
6257 default:
6258 gcc_unreachable ();
6261 /* You only get two chances. */
6262 if (try_again)
6263 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6265 gcc_assert (vec_cmp_insn != -1);
6267 if (swap_operands)
6269 rtx tmp;
6270 tmp = op0;
6271 op0 = op1;
6272 op1 = tmp;
6276 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6277 if (dmode != dest_mode)
6279 rtx temp = gen_reg_rtx (dest_mode);
6280 convert_move (temp, mask, 0);
6281 return temp;
6283 return mask;
6287 /* Emit vector conditional expression.
6288 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6289 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6292 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6293 rtx cond, rtx cc_op0, rtx cc_op1)
6295 machine_mode dest_mode = GET_MODE (dest);
6296 enum rtx_code rcode = GET_CODE (cond);
6297 rtx mask;
6299 /* Get the vector mask for the given relational operations. */
6300 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6302 emit_insn(gen_selb (dest, op2, op1, mask));
6304 return 1;
6307 static rtx
6308 spu_force_reg (machine_mode mode, rtx op)
6310 rtx x, r;
6311 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6313 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6314 || GET_MODE (op) == BLKmode)
6315 return force_reg (mode, convert_to_mode (mode, op, 0));
6316 abort ();
6319 r = force_reg (GET_MODE (op), op);
6320 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6322 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6323 if (x)
6324 return x;
6327 x = gen_reg_rtx (mode);
6328 emit_insn (gen_spu_convert (x, r));
6329 return x;
6332 static void
6333 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6335 HOST_WIDE_INT v = 0;
6336 int lsbits;
6337 /* Check the range of immediate operands. */
6338 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6340 int range = p - SPU_BTI_7;
6342 if (!CONSTANT_P (op))
6343 error ("%s expects an integer literal in the range [%d, %d]",
6344 d->name,
6345 spu_builtin_range[range].low, spu_builtin_range[range].high);
6347 if (GET_CODE (op) == CONST
6348 && (GET_CODE (XEXP (op, 0)) == PLUS
6349 || GET_CODE (XEXP (op, 0)) == MINUS))
6351 v = INTVAL (XEXP (XEXP (op, 0), 1));
6352 op = XEXP (XEXP (op, 0), 0);
6354 else if (GET_CODE (op) == CONST_INT)
6355 v = INTVAL (op);
6356 else if (GET_CODE (op) == CONST_VECTOR
6357 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6358 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6360 /* The default for v is 0 which is valid in every range. */
6361 if (v < spu_builtin_range[range].low
6362 || v > spu_builtin_range[range].high)
6363 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6364 d->name,
6365 spu_builtin_range[range].low, spu_builtin_range[range].high,
6368 switch (p)
6370 case SPU_BTI_S10_4:
6371 lsbits = 4;
6372 break;
6373 case SPU_BTI_U16_2:
6374 /* This is only used in lqa, and stqa. Even though the insns
6375 encode 16 bits of the address (all but the 2 least
6376 significant), only 14 bits are used because it is masked to
6377 be 16 byte aligned. */
6378 lsbits = 4;
6379 break;
6380 case SPU_BTI_S16_2:
6381 /* This is used for lqr and stqr. */
6382 lsbits = 2;
6383 break;
6384 default:
6385 lsbits = 0;
6388 if (GET_CODE (op) == LABEL_REF
6389 || (GET_CODE (op) == SYMBOL_REF
6390 && SYMBOL_REF_FUNCTION_P (op))
6391 || (v & ((1 << lsbits) - 1)) != 0)
6392 warning (0, "%d least significant bits of %s are ignored", lsbits,
6393 d->name);
6398 static int
6399 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6400 rtx target, rtx ops[])
6402 enum insn_code icode = (enum insn_code) d->icode;
6403 int i = 0, a;
6405 /* Expand the arguments into rtl. */
6407 if (d->parm[0] != SPU_BTI_VOID)
6408 ops[i++] = target;
6410 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6412 tree arg = CALL_EXPR_ARG (exp, a);
6413 if (arg == 0)
6414 abort ();
6415 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6418 gcc_assert (i == insn_data[icode].n_generator_args);
6419 return i;
6422 static rtx
6423 spu_expand_builtin_1 (struct spu_builtin_description *d,
6424 tree exp, rtx target)
6426 rtx pat;
6427 rtx ops[8];
6428 enum insn_code icode = (enum insn_code) d->icode;
6429 machine_mode mode, tmode;
6430 int i, p;
6431 int n_operands;
6432 tree return_type;
6434 /* Set up ops[] with values from arglist. */
6435 n_operands = expand_builtin_args (d, exp, target, ops);
6437 /* Handle the target operand which must be operand 0. */
6438 i = 0;
6439 if (d->parm[0] != SPU_BTI_VOID)
6442 /* We prefer the mode specified for the match_operand otherwise
6443 use the mode from the builtin function prototype. */
6444 tmode = insn_data[d->icode].operand[0].mode;
6445 if (tmode == VOIDmode)
6446 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6448 /* Try to use target because not using it can lead to extra copies
6449 and when we are using all of the registers extra copies leads
6450 to extra spills. */
6451 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6452 ops[0] = target;
6453 else
6454 target = ops[0] = gen_reg_rtx (tmode);
6456 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6457 abort ();
6459 i++;
6462 if (d->fcode == SPU_MASK_FOR_LOAD)
6464 machine_mode mode = insn_data[icode].operand[1].mode;
6465 tree arg;
6466 rtx addr, op, pat;
6468 /* get addr */
6469 arg = CALL_EXPR_ARG (exp, 0);
6470 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6471 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6472 addr = memory_address (mode, op);
6474 /* negate addr */
6475 op = gen_reg_rtx (GET_MODE (addr));
6476 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6477 op = gen_rtx_MEM (mode, op);
6479 pat = GEN_FCN (icode) (target, op);
6480 if (!pat)
6481 return 0;
6482 emit_insn (pat);
6483 return target;
6486 /* Ignore align_hint, but still expand it's args in case they have
6487 side effects. */
6488 if (icode == CODE_FOR_spu_align_hint)
6489 return 0;
6491 /* Handle the rest of the operands. */
6492 for (p = 1; i < n_operands; i++, p++)
6494 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6495 mode = insn_data[d->icode].operand[i].mode;
6496 else
6497 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6499 /* mode can be VOIDmode here for labels */
6501 /* For specific intrinsics with an immediate operand, e.g.,
6502 si_ai(), we sometimes need to convert the scalar argument to a
6503 vector argument by splatting the scalar. */
6504 if (VECTOR_MODE_P (mode)
6505 && (GET_CODE (ops[i]) == CONST_INT
6506 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6507 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6509 if (GET_CODE (ops[i]) == CONST_INT)
6510 ops[i] = spu_const (mode, INTVAL (ops[i]));
6511 else
6513 rtx reg = gen_reg_rtx (mode);
6514 machine_mode imode = GET_MODE_INNER (mode);
6515 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6516 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6517 if (imode != GET_MODE (ops[i]))
6518 ops[i] = convert_to_mode (imode, ops[i],
6519 TYPE_UNSIGNED (spu_builtin_types
6520 [d->parm[i]]));
6521 emit_insn (gen_spu_splats (reg, ops[i]));
6522 ops[i] = reg;
6526 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6528 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6529 ops[i] = spu_force_reg (mode, ops[i]);
6532 switch (n_operands)
6534 case 0:
6535 pat = GEN_FCN (icode) (0);
6536 break;
6537 case 1:
6538 pat = GEN_FCN (icode) (ops[0]);
6539 break;
6540 case 2:
6541 pat = GEN_FCN (icode) (ops[0], ops[1]);
6542 break;
6543 case 3:
6544 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6545 break;
6546 case 4:
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6548 break;
6549 case 5:
6550 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6551 break;
6552 case 6:
6553 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6554 break;
6555 default:
6556 abort ();
6559 if (!pat)
6560 abort ();
6562 if (d->type == B_CALL || d->type == B_BISLED)
6563 emit_call_insn (pat);
6564 else if (d->type == B_JUMP)
6566 emit_jump_insn (pat);
6567 emit_barrier ();
6569 else
6570 emit_insn (pat);
6572 return_type = spu_builtin_types[d->parm[0]];
6573 if (d->parm[0] != SPU_BTI_VOID
6574 && GET_MODE (target) != TYPE_MODE (return_type))
6576 /* target is the return value. It should always be the mode of
6577 the builtin function prototype. */
6578 target = spu_force_reg (TYPE_MODE (return_type), target);
6581 return target;
6585 spu_expand_builtin (tree exp,
6586 rtx target,
6587 rtx subtarget ATTRIBUTE_UNUSED,
6588 machine_mode mode ATTRIBUTE_UNUSED,
6589 int ignore ATTRIBUTE_UNUSED)
6591 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6592 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6593 struct spu_builtin_description *d;
6595 if (fcode < NUM_SPU_BUILTINS)
6597 d = &spu_builtins[fcode];
6599 return spu_expand_builtin_1 (d, exp, target);
6601 abort ();
6604 /* Implement targetm.vectorize.builtin_mask_for_load. */
6605 static tree
6606 spu_builtin_mask_for_load (void)
6608 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6611 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6612 static int
6613 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6614 tree vectype,
6615 int misalign ATTRIBUTE_UNUSED)
6617 unsigned elements;
6619 switch (type_of_cost)
6621 case scalar_stmt:
6622 case vector_stmt:
6623 case vector_load:
6624 case vector_store:
6625 case vec_to_scalar:
6626 case scalar_to_vec:
6627 case cond_branch_not_taken:
6628 case vec_perm:
6629 case vec_promote_demote:
6630 return 1;
6632 case scalar_store:
6633 return 10;
6635 case scalar_load:
6636 /* Load + rotate. */
6637 return 2;
6639 case unaligned_load:
6640 case vector_gather_load:
6641 case vector_scatter_store:
6642 return 2;
6644 case cond_branch_taken:
6645 return 6;
6647 case vec_construct:
6648 elements = TYPE_VECTOR_SUBPARTS (vectype);
6649 return elements / 2 + 1;
6651 default:
6652 gcc_unreachable ();
6656 /* Implement targetm.vectorize.init_cost. */
6658 static void *
6659 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6661 unsigned *cost = XNEWVEC (unsigned, 3);
6662 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6663 return cost;
6666 /* Implement targetm.vectorize.add_stmt_cost. */
6668 static unsigned
6669 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6670 struct _stmt_vec_info *stmt_info, int misalign,
6671 enum vect_cost_model_location where)
6673 unsigned *cost = (unsigned *) data;
6674 unsigned retval = 0;
6676 if (flag_vect_cost_model)
6678 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6679 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6681 /* Statements in an inner loop relative to the loop being
6682 vectorized are weighted more heavily. The value here is
6683 arbitrary and could potentially be improved with analysis. */
6684 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6685 count *= 50; /* FIXME. */
6687 retval = (unsigned) (count * stmt_cost);
6688 cost[where] += retval;
6691 return retval;
6694 /* Implement targetm.vectorize.finish_cost. */
6696 static void
6697 spu_finish_cost (void *data, unsigned *prologue_cost,
6698 unsigned *body_cost, unsigned *epilogue_cost)
6700 unsigned *cost = (unsigned *) data;
6701 *prologue_cost = cost[vect_prologue];
6702 *body_cost = cost[vect_body];
6703 *epilogue_cost = cost[vect_epilogue];
6706 /* Implement targetm.vectorize.destroy_cost_data. */
6708 static void
6709 spu_destroy_cost_data (void *data)
6711 free (data);
6714 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6715 after applying N number of iterations. This routine does not determine
6716 how may iterations are required to reach desired alignment. */
6718 static bool
6719 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6721 if (is_packed)
6722 return false;
6724 /* All other types are naturally aligned. */
6725 return true;
6728 /* Return the appropriate mode for a named address pointer. */
6729 static scalar_int_mode
6730 spu_addr_space_pointer_mode (addr_space_t addrspace)
6732 switch (addrspace)
6734 case ADDR_SPACE_GENERIC:
6735 return ptr_mode;
6736 case ADDR_SPACE_EA:
6737 return EAmode;
6738 default:
6739 gcc_unreachable ();
6743 /* Return the appropriate mode for a named address address. */
6744 static scalar_int_mode
6745 spu_addr_space_address_mode (addr_space_t addrspace)
6747 switch (addrspace)
6749 case ADDR_SPACE_GENERIC:
6750 return Pmode;
6751 case ADDR_SPACE_EA:
6752 return EAmode;
6753 default:
6754 gcc_unreachable ();
6758 /* Determine if one named address space is a subset of another. */
6760 static bool
6761 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6763 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6764 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6766 if (subset == superset)
6767 return true;
6769 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6770 being subsets but instead as disjoint address spaces. */
6771 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6772 return false;
6774 else
6775 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6778 /* Convert from one address space to another. */
6779 static rtx
6780 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6782 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6783 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6785 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6786 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6788 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6790 rtx result, ls;
6792 ls = gen_const_mem (DImode,
6793 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6794 set_mem_align (ls, 128);
6796 result = gen_reg_rtx (Pmode);
6797 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6798 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6799 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6800 ls, const0_rtx, Pmode, 1);
6802 emit_insn (gen_subsi3 (result, op, ls));
6804 return result;
6807 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6809 rtx result, ls;
6811 ls = gen_const_mem (DImode,
6812 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6813 set_mem_align (ls, 128);
6815 result = gen_reg_rtx (EAmode);
6816 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6817 op = force_reg (Pmode, op);
6818 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6819 ls, const0_rtx, EAmode, 1);
6820 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6822 if (EAmode == SImode)
6823 emit_insn (gen_addsi3 (result, op, ls));
6824 else
6825 emit_insn (gen_adddi3 (result, op, ls));
6827 return result;
6830 else
6831 gcc_unreachable ();
6835 /* Count the total number of instructions in each pipe and return the
6836 maximum, which is used as the Minimum Iteration Interval (MII)
6837 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6838 -2 are instructions that can go in pipe0 or pipe1. */
6839 static int
6840 spu_sms_res_mii (struct ddg *g)
6842 int i;
6843 unsigned t[4] = {0, 0, 0, 0};
6845 for (i = 0; i < g->num_nodes; i++)
6847 rtx_insn *insn = g->nodes[i].insn;
6848 int p = get_pipe (insn) + 2;
6850 gcc_assert (p >= 0);
6851 gcc_assert (p < 4);
6853 t[p]++;
6854 if (dump_file && INSN_P (insn))
6855 fprintf (dump_file, "i%d %s %d %d\n",
6856 INSN_UID (insn),
6857 insn_data[INSN_CODE(insn)].name,
6858 p, t[p]);
6860 if (dump_file)
6861 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6863 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6867 void
6868 spu_init_expanders (void)
6870 if (cfun)
6872 rtx r0, r1;
6873 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6874 frame_pointer_needed is true. We don't know that until we're
6875 expanding the prologue. */
6876 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6878 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6879 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6880 to be treated as aligned, so generate them here. */
6881 r0 = gen_reg_rtx (SImode);
6882 r1 = gen_reg_rtx (SImode);
6883 mark_reg_pointer (r0, 128);
6884 mark_reg_pointer (r1, 128);
6885 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6886 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6890 static scalar_int_mode
6891 spu_libgcc_cmp_return_mode (void)
6894 /* For SPU word mode is TI mode so it is better to use SImode
6895 for compare returns. */
6896 return SImode;
6899 static scalar_int_mode
6900 spu_libgcc_shift_count_mode (void)
6902 /* For SPU word mode is TI mode so it is better to use SImode
6903 for shift counts. */
6904 return SImode;
6907 /* Implement targetm.section_type_flags. */
6908 static unsigned int
6909 spu_section_type_flags (tree decl, const char *name, int reloc)
6911 /* .toe needs to have type @nobits. */
6912 if (strcmp (name, ".toe") == 0)
6913 return SECTION_BSS;
6914 /* Don't load _ea into the current address space. */
6915 if (strcmp (name, "._ea") == 0)
6916 return SECTION_WRITE | SECTION_DEBUG;
6917 return default_section_type_flags (decl, name, reloc);
6920 /* Implement targetm.select_section. */
6921 static section *
6922 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6924 /* Variables and constants defined in the __ea address space
6925 go into a special section named "._ea". */
6926 if (TREE_TYPE (decl) != error_mark_node
6927 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6929 /* We might get called with string constants, but get_named_section
6930 doesn't like them as they are not DECLs. Also, we need to set
6931 flags in that case. */
6932 if (!DECL_P (decl))
6933 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6935 return get_named_section (decl, "._ea", reloc);
6938 return default_elf_select_section (decl, reloc, align);
6941 /* Implement targetm.unique_section. */
6942 static void
6943 spu_unique_section (tree decl, int reloc)
6945 /* We don't support unique section names in the __ea address
6946 space for now. */
6947 if (TREE_TYPE (decl) != error_mark_node
6948 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6949 return;
6951 default_unique_section (decl, reloc);
6954 /* Generate a constant or register which contains 2^SCALE. We assume
6955 the result is valid for MODE. Currently, MODE must be V4SFmode and
6956 SCALE must be SImode. */
6958 spu_gen_exp2 (machine_mode mode, rtx scale)
6960 gcc_assert (mode == V4SFmode);
6961 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6962 if (GET_CODE (scale) != CONST_INT)
6964 /* unsigned int exp = (127 + scale) << 23;
6965 __vector float m = (__vector float) spu_splats (exp); */
6966 rtx reg = force_reg (SImode, scale);
6967 rtx exp = gen_reg_rtx (SImode);
6968 rtx mul = gen_reg_rtx (mode);
6969 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6970 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6971 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6972 return mul;
6974 else
6976 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6977 unsigned char arr[16];
6978 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6979 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6980 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6981 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6982 return array_to_constant (mode, arr);
6986 /* After reload, just change the convert into a move instruction
6987 or a dead instruction. */
6988 void
6989 spu_split_convert (rtx ops[])
6991 if (REGNO (ops[0]) == REGNO (ops[1]))
6992 emit_note (NOTE_INSN_DELETED);
6993 else
6995 /* Use TImode always as this might help hard reg copyprop. */
6996 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6997 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6998 emit_insn (gen_move_insn (op0, op1));
7002 void
7003 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
7005 fprintf (file, "# profile\n");
7006 fprintf (file, "brsl $75, _mcount\n");
7009 /* Implement targetm.ref_may_alias_errno. */
7010 static bool
7011 spu_ref_may_alias_errno (ao_ref *ref)
7013 tree base = ao_ref_base (ref);
7015 /* With SPU newlib, errno is defined as something like
7016 _impure_data._errno
7017 The default implementation of this target macro does not
7018 recognize such expressions, so special-code for it here. */
7020 if (TREE_CODE (base) == VAR_DECL
7021 && !TREE_STATIC (base)
7022 && DECL_EXTERNAL (base)
7023 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7024 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7025 "_impure_data") == 0
7026 /* _errno is the first member of _impure_data. */
7027 && ref->offset == 0)
7028 return true;
7030 return default_ref_may_alias_errno (ref);
7033 /* Output thunk to FILE that implements a C++ virtual function call (with
7034 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7035 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7036 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7037 relative to the resulting this pointer. */
7039 static void
7040 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7041 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7042 tree function)
7044 rtx op[8];
7046 /* Make sure unwind info is emitted for the thunk if needed. */
7047 final_start_function (emit_barrier (), file, 1);
7049 /* Operand 0 is the target function. */
7050 op[0] = XEXP (DECL_RTL (function), 0);
7052 /* Operand 1 is the 'this' pointer. */
7053 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7054 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7055 else
7056 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7058 /* Operands 2/3 are the low/high halfwords of delta. */
7059 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7060 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7062 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7063 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7064 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7066 /* Operands 6/7 are temporary registers. */
7067 op[6] = gen_rtx_REG (Pmode, 79);
7068 op[7] = gen_rtx_REG (Pmode, 78);
7070 /* Add DELTA to this pointer. */
7071 if (delta)
7073 if (delta >= -0x200 && delta < 0x200)
7074 output_asm_insn ("ai\t%1,%1,%2", op);
7075 else if (delta >= -0x8000 && delta < 0x8000)
7077 output_asm_insn ("il\t%6,%2", op);
7078 output_asm_insn ("a\t%1,%1,%6", op);
7080 else
7082 output_asm_insn ("ilhu\t%6,%3", op);
7083 output_asm_insn ("iohl\t%6,%2", op);
7084 output_asm_insn ("a\t%1,%1,%6", op);
7088 /* Perform vcall adjustment. */
7089 if (vcall_offset)
7091 output_asm_insn ("lqd\t%7,0(%1)", op);
7092 output_asm_insn ("rotqby\t%7,%7,%1", op);
7094 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7095 output_asm_insn ("ai\t%7,%7,%4", op);
7096 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7098 output_asm_insn ("il\t%6,%4", op);
7099 output_asm_insn ("a\t%7,%7,%6", op);
7101 else
7103 output_asm_insn ("ilhu\t%6,%5", op);
7104 output_asm_insn ("iohl\t%6,%4", op);
7105 output_asm_insn ("a\t%7,%7,%6", op);
7108 output_asm_insn ("lqd\t%6,0(%7)", op);
7109 output_asm_insn ("rotqby\t%6,%6,%7", op);
7110 output_asm_insn ("a\t%1,%1,%6", op);
7113 /* Jump to target. */
7114 output_asm_insn ("br\t%0", op);
7116 final_end_function ();
7119 /* Canonicalize a comparison from one we don't have to one we do have. */
7120 static void
7121 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7122 bool op0_preserve_value)
7124 if (!op0_preserve_value
7125 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7127 rtx tem = *op0;
7128 *op0 = *op1;
7129 *op1 = tem;
7130 *code = (int)swap_condition ((enum rtx_code)*code);
7134 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7135 to perform. MEM is the memory on which to operate. VAL is the second
7136 operand of the binary operator. BEFORE and AFTER are optional locations to
7137 return the value of MEM either before of after the operation. */
7138 void
7139 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7140 rtx orig_before, rtx orig_after)
7142 machine_mode mode = GET_MODE (mem);
7143 rtx before = orig_before, after = orig_after;
7145 if (before == NULL_RTX)
7146 before = gen_reg_rtx (mode);
7148 emit_move_insn (before, mem);
7150 if (code == MULT) /* NAND operation */
7152 rtx x = expand_simple_binop (mode, AND, before, val,
7153 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7154 after = expand_simple_unop (mode, NOT, x, after, 1);
7156 else
7158 after = expand_simple_binop (mode, code, before, val,
7159 after, 1, OPTAB_LIB_WIDEN);
7162 emit_move_insn (mem, after);
7164 if (orig_after && after != orig_after)
7165 emit_move_insn (orig_after, after);
7168 /* Implement TARGET_MODES_TIEABLE_P. */
7170 static bool
7171 spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7173 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7174 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7177 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7178 in the lowpart of a register, which is only true for SPU. */
7180 static bool
7181 spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7183 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7184 || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7185 || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7188 /* Implement TARGET_TRULY_NOOP_TRUNCATION. */
7190 static bool
7191 spu_truly_noop_truncation (poly_uint64 outprec, poly_uint64 inprec)
7193 return inprec <= 32 && outprec <= inprec;
7196 /* Implement TARGET_STATIC_RTX_ALIGNMENT.
7198 Make all static objects 16-byte aligned. This allows us to assume
7199 they are also padded to 16 bytes, which means we can use a single
7200 load or store instruction to access them. */
7202 static HOST_WIDE_INT
7203 spu_static_rtx_alignment (machine_mode mode)
7205 return MAX (GET_MODE_ALIGNMENT (mode), 128);
7208 /* Implement TARGET_CONSTANT_ALIGNMENT.
7210 Make all static objects 16-byte aligned. This allows us to assume
7211 they are also padded to 16 bytes, which means we can use a single
7212 load or store instruction to access them. */
7214 static HOST_WIDE_INT
7215 spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7217 return MAX (align, 128);
7220 /* Table of machine attributes. */
7221 static const struct attribute_spec spu_attribute_table[] =
7223 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
7224 affects_type_identity, handler, exclude } */
7225 { "naked", 0, 0, true, false, false, false,
7226 spu_handle_fndecl_attribute, NULL },
7227 { "spu_vector", 0, 0, false, true, false, false,
7228 spu_handle_vector_attribute, NULL },
7229 { NULL, 0, 0, false, false, false, false, NULL, NULL }
7232 /* TARGET overrides. */
7234 #undef TARGET_LRA_P
7235 #define TARGET_LRA_P hook_bool_void_false
7237 #undef TARGET_ADDR_SPACE_POINTER_MODE
7238 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7240 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7241 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7243 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7244 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7245 spu_addr_space_legitimate_address_p
7247 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7248 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7250 #undef TARGET_ADDR_SPACE_SUBSET_P
7251 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7253 #undef TARGET_ADDR_SPACE_CONVERT
7254 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7256 #undef TARGET_INIT_BUILTINS
7257 #define TARGET_INIT_BUILTINS spu_init_builtins
7258 #undef TARGET_BUILTIN_DECL
7259 #define TARGET_BUILTIN_DECL spu_builtin_decl
7261 #undef TARGET_EXPAND_BUILTIN
7262 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7264 #undef TARGET_UNWIND_WORD_MODE
7265 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7267 #undef TARGET_LEGITIMIZE_ADDRESS
7268 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7270 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7271 and .quad for the debugger. When it is known that the assembler is fixed,
7272 these can be removed. */
7273 #undef TARGET_ASM_UNALIGNED_SI_OP
7274 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7276 #undef TARGET_ASM_ALIGNED_DI_OP
7277 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7279 /* The .8byte directive doesn't seem to work well for a 32 bit
7280 architecture. */
7281 #undef TARGET_ASM_UNALIGNED_DI_OP
7282 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7284 #undef TARGET_RTX_COSTS
7285 #define TARGET_RTX_COSTS spu_rtx_costs
7287 #undef TARGET_ADDRESS_COST
7288 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7290 #undef TARGET_SCHED_ISSUE_RATE
7291 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7293 #undef TARGET_SCHED_INIT_GLOBAL
7294 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7296 #undef TARGET_SCHED_INIT
7297 #define TARGET_SCHED_INIT spu_sched_init
7299 #undef TARGET_SCHED_VARIABLE_ISSUE
7300 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7302 #undef TARGET_SCHED_REORDER
7303 #define TARGET_SCHED_REORDER spu_sched_reorder
7305 #undef TARGET_SCHED_REORDER2
7306 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7308 #undef TARGET_SCHED_ADJUST_COST
7309 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7311 #undef TARGET_ATTRIBUTE_TABLE
7312 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7314 #undef TARGET_ASM_INTEGER
7315 #define TARGET_ASM_INTEGER spu_assemble_integer
7317 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7318 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7320 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7321 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7323 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7324 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7326 #undef TARGET_ASM_GLOBALIZE_LABEL
7327 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7329 #undef TARGET_PASS_BY_REFERENCE
7330 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7332 #undef TARGET_FUNCTION_ARG
7333 #define TARGET_FUNCTION_ARG spu_function_arg
7335 #undef TARGET_FUNCTION_ARG_ADVANCE
7336 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7338 #undef TARGET_FUNCTION_ARG_OFFSET
7339 #define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7341 #undef TARGET_FUNCTION_ARG_PADDING
7342 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7344 #undef TARGET_MUST_PASS_IN_STACK
7345 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7347 #undef TARGET_BUILD_BUILTIN_VA_LIST
7348 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7350 #undef TARGET_EXPAND_BUILTIN_VA_START
7351 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7353 #undef TARGET_SETUP_INCOMING_VARARGS
7354 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7356 #undef TARGET_MACHINE_DEPENDENT_REORG
7357 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7359 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7360 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7362 #undef TARGET_INIT_LIBFUNCS
7363 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7365 #undef TARGET_RETURN_IN_MEMORY
7366 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7368 #undef TARGET_ENCODE_SECTION_INFO
7369 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7371 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7372 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7374 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7375 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7377 #undef TARGET_VECTORIZE_INIT_COST
7378 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7380 #undef TARGET_VECTORIZE_ADD_STMT_COST
7381 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7383 #undef TARGET_VECTORIZE_FINISH_COST
7384 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7386 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7387 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7389 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7390 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7392 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7393 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7395 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7396 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7398 #undef TARGET_SCHED_SMS_RES_MII
7399 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7401 #undef TARGET_SECTION_TYPE_FLAGS
7402 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7404 #undef TARGET_ASM_SELECT_SECTION
7405 #define TARGET_ASM_SELECT_SECTION spu_select_section
7407 #undef TARGET_ASM_UNIQUE_SECTION
7408 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7410 #undef TARGET_LEGITIMATE_ADDRESS_P
7411 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7413 #undef TARGET_LEGITIMATE_CONSTANT_P
7414 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7416 #undef TARGET_TRAMPOLINE_INIT
7417 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7419 #undef TARGET_WARN_FUNC_RETURN
7420 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7422 #undef TARGET_OPTION_OVERRIDE
7423 #define TARGET_OPTION_OVERRIDE spu_option_override
7425 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7426 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7428 #undef TARGET_REF_MAY_ALIAS_ERRNO
7429 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7431 #undef TARGET_ASM_OUTPUT_MI_THUNK
7432 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7433 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7434 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7436 /* Variable tracking should be run after all optimizations which
7437 change order of insns. It also needs a valid CFG. */
7438 #undef TARGET_DELAY_VARTRACK
7439 #define TARGET_DELAY_VARTRACK true
7441 #undef TARGET_CANONICALIZE_COMPARISON
7442 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7444 #undef TARGET_CAN_USE_DOLOOP_P
7445 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7447 #undef TARGET_MODES_TIEABLE_P
7448 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7450 #undef TARGET_HARD_REGNO_NREGS
7451 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7453 #undef TARGET_CAN_CHANGE_MODE_CLASS
7454 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7456 #undef TARGET_TRULY_NOOP_TRUNCATION
7457 #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7459 #undef TARGET_STATIC_RTX_ALIGNMENT
7460 #define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
7461 #undef TARGET_CONSTANT_ALIGNMENT
7462 #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7464 struct gcc_target targetm = TARGET_INITIALIZER;
7466 #include "gt-spu.h"