Add gen_(const_)vec_duplicate helpers
[official-gcc.git] / gcc / config / spu / spu.c
blob78252101ef57397976a674bc066285eb959a6452
1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "backend.h"
21 #include "target.h"
22 #include "rtl.h"
23 #include "tree.h"
24 #include "gimple.h"
25 #include "cfghooks.h"
26 #include "cfgloop.h"
27 #include "df.h"
28 #include "memmodel.h"
29 #include "tm_p.h"
30 #include "stringpool.h"
31 #include "attribs.h"
32 #include "expmed.h"
33 #include "optabs.h"
34 #include "regs.h"
35 #include "emit-rtl.h"
36 #include "recog.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
39 #include "alias.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "explow.h"
45 #include "expr.h"
46 #include "output.h"
47 #include "cfgrtl.h"
48 #include "cfgbuild.h"
49 #include "langhooks.h"
50 #include "reload.h"
51 #include "sched-int.h"
52 #include "params.h"
53 #include "gimplify.h"
54 #include "tm-constrs.h"
55 #include "ddg.h"
56 #include "dumpfile.h"
57 #include "builtins.h"
58 #include "rtl-iter.h"
60 /* This file should be included last. */
61 #include "target-def.h"
63 /* Builtin types, data and prototypes. */
65 enum spu_builtin_type_index
67 SPU_BTI_END_OF_PARAMS,
69 /* We create new type nodes for these. */
70 SPU_BTI_V16QI,
71 SPU_BTI_V8HI,
72 SPU_BTI_V4SI,
73 SPU_BTI_V2DI,
74 SPU_BTI_V4SF,
75 SPU_BTI_V2DF,
76 SPU_BTI_UV16QI,
77 SPU_BTI_UV8HI,
78 SPU_BTI_UV4SI,
79 SPU_BTI_UV2DI,
81 /* A 16-byte type. (Implemented with V16QI_type_node) */
82 SPU_BTI_QUADWORD,
84 /* These all correspond to intSI_type_node */
85 SPU_BTI_7,
86 SPU_BTI_S7,
87 SPU_BTI_U7,
88 SPU_BTI_S10,
89 SPU_BTI_S10_4,
90 SPU_BTI_U14,
91 SPU_BTI_16,
92 SPU_BTI_S16,
93 SPU_BTI_S16_2,
94 SPU_BTI_U16,
95 SPU_BTI_U16_2,
96 SPU_BTI_U18,
98 /* These correspond to the standard types */
99 SPU_BTI_INTQI,
100 SPU_BTI_INTHI,
101 SPU_BTI_INTSI,
102 SPU_BTI_INTDI,
104 SPU_BTI_UINTQI,
105 SPU_BTI_UINTHI,
106 SPU_BTI_UINTSI,
107 SPU_BTI_UINTDI,
109 SPU_BTI_FLOAT,
110 SPU_BTI_DOUBLE,
112 SPU_BTI_VOID,
113 SPU_BTI_PTR,
115 SPU_BTI_MAX
118 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
119 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
120 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
121 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
122 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
123 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
124 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
125 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
126 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
127 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
129 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
131 struct spu_builtin_range
133 int low, high;
136 static struct spu_builtin_range spu_builtin_range[] = {
137 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
138 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
139 {0ll, 0x7fll}, /* SPU_BTI_U7 */
140 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
141 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
142 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
143 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
144 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
145 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
146 {0ll, 0xffffll}, /* SPU_BTI_U16 */
147 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
148 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
152 /* Target specific attribute specifications. */
153 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
155 /* Prototypes and external defs. */
156 static int get_pipe (rtx_insn *insn);
157 static int spu_naked_function_p (tree func);
158 static int mem_is_padded_component_ref (rtx x);
159 static void fix_range (const char *);
160 static rtx spu_expand_load (rtx, rtx, rtx, int);
162 /* Which instruction set architecture to use. */
163 int spu_arch;
164 /* Which cpu are we tuning for. */
165 int spu_tune;
167 /* The hardware requires 8 insns between a hint and the branch it
168 effects. This variable describes how many rtl instructions the
169 compiler needs to see before inserting a hint, and then the compiler
170 will insert enough nops to make it at least 8 insns. The default is
171 for the compiler to allow up to 2 nops be emitted. The nops are
172 inserted in pairs, so we round down. */
173 int spu_hint_dist = (8*4) - (2*4);
175 enum spu_immediate {
176 SPU_NONE,
177 SPU_IL,
178 SPU_ILA,
179 SPU_ILH,
180 SPU_ILHU,
181 SPU_ORI,
182 SPU_ORHI,
183 SPU_ORBI,
184 SPU_IOHL
186 enum immediate_class
188 IC_POOL, /* constant pool */
189 IC_IL1, /* one il* instruction */
190 IC_IL2, /* both ilhu and iohl instructions */
191 IC_IL1s, /* one il* instruction */
192 IC_IL2s, /* both ilhu and iohl instructions */
193 IC_FSMBI, /* the fsmbi instruction */
194 IC_CPAT, /* one of the c*d instructions */
195 IC_FSMBI2 /* fsmbi plus 1 other instruction */
198 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
199 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
200 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
201 static enum immediate_class classify_immediate (rtx op,
202 machine_mode mode);
204 /* Pointer mode for __ea references. */
205 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
208 /* Define the structure for the machine field in struct function. */
209 struct GTY(()) machine_function
211 /* Register to use for PIC accesses. */
212 rtx pic_reg;
215 /* How to allocate a 'struct machine_function'. */
216 static struct machine_function *
217 spu_init_machine_status (void)
219 return ggc_cleared_alloc<machine_function> ();
222 /* Implement TARGET_OPTION_OVERRIDE. */
223 static void
224 spu_option_override (void)
226 /* Set up function hooks. */
227 init_machine_status = spu_init_machine_status;
229 /* Small loops will be unpeeled at -O3. For SPU it is more important
230 to keep code small by default. */
231 if (!flag_unroll_loops && !flag_peel_loops)
232 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 4,
233 global_options.x_param_values,
234 global_options_set.x_param_values);
236 flag_omit_frame_pointer = 1;
238 /* Functions must be 8 byte aligned so we correctly handle dual issue */
239 if (align_functions < 8)
240 align_functions = 8;
242 spu_hint_dist = 8*4 - spu_max_nops*4;
243 if (spu_hint_dist < 0)
244 spu_hint_dist = 0;
246 if (spu_fixed_range_string)
247 fix_range (spu_fixed_range_string);
249 /* Determine processor architectural level. */
250 if (spu_arch_string)
252 if (strcmp (&spu_arch_string[0], "cell") == 0)
253 spu_arch = PROCESSOR_CELL;
254 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
255 spu_arch = PROCESSOR_CELLEDP;
256 else
257 error ("bad value (%s) for -march= switch", spu_arch_string);
260 /* Determine processor to tune for. */
261 if (spu_tune_string)
263 if (strcmp (&spu_tune_string[0], "cell") == 0)
264 spu_tune = PROCESSOR_CELL;
265 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
266 spu_tune = PROCESSOR_CELLEDP;
267 else
268 error ("bad value (%s) for -mtune= switch", spu_tune_string);
271 /* Change defaults according to the processor architecture. */
272 if (spu_arch == PROCESSOR_CELLEDP)
274 /* If no command line option has been otherwise specified, change
275 the default to -mno-safe-hints on celledp -- only the original
276 Cell/B.E. processors require this workaround. */
277 if (!(target_flags_explicit & MASK_SAFE_HINTS))
278 target_flags &= ~MASK_SAFE_HINTS;
281 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
284 /* Implement TARGET_HARD_REGNO_NREGS. */
286 static unsigned int
287 spu_hard_regno_nregs (unsigned int, machine_mode mode)
289 return CEIL (GET_MODE_BITSIZE (mode), MAX_FIXED_MODE_SIZE);
292 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
293 struct attribute_spec.handler. */
295 /* True if MODE is valid for the target. By "valid", we mean able to
296 be manipulated in non-trivial ways. In particular, this means all
297 the arithmetic is supported. */
298 static bool
299 spu_scalar_mode_supported_p (scalar_mode mode)
301 switch (mode)
303 case E_QImode:
304 case E_HImode:
305 case E_SImode:
306 case E_SFmode:
307 case E_DImode:
308 case E_TImode:
309 case E_DFmode:
310 return true;
312 default:
313 return false;
317 /* Similarly for vector modes. "Supported" here is less strict. At
318 least some operations are supported; need to check optabs or builtins
319 for further details. */
320 static bool
321 spu_vector_mode_supported_p (machine_mode mode)
323 switch (mode)
325 case E_V16QImode:
326 case E_V8HImode:
327 case E_V4SImode:
328 case E_V2DImode:
329 case E_V4SFmode:
330 case E_V2DFmode:
331 return true;
333 default:
334 return false;
338 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
339 least significant bytes of the outer mode. This function returns
340 TRUE for the SUBREG's where this is correct. */
342 valid_subreg (rtx op)
344 machine_mode om = GET_MODE (op);
345 machine_mode im = GET_MODE (SUBREG_REG (op));
346 return om != VOIDmode && im != VOIDmode
347 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
348 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
349 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
352 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
353 and adjust the start offset. */
354 static rtx
355 adjust_operand (rtx op, HOST_WIDE_INT * start)
357 machine_mode mode;
358 int op_size;
359 /* Strip any paradoxical SUBREG. */
360 if (GET_CODE (op) == SUBREG
361 && (GET_MODE_BITSIZE (GET_MODE (op))
362 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
364 if (start)
365 *start -=
366 GET_MODE_BITSIZE (GET_MODE (op)) -
367 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
368 op = SUBREG_REG (op);
370 /* If it is smaller than SI, assure a SUBREG */
371 op_size = GET_MODE_BITSIZE (GET_MODE (op));
372 if (op_size < 32)
374 if (start)
375 *start += 32 - op_size;
376 op_size = 32;
378 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
379 mode = int_mode_for_size (op_size, 0).require ();
380 if (mode != GET_MODE (op))
381 op = gen_rtx_SUBREG (mode, op, 0);
382 return op;
385 void
386 spu_expand_extv (rtx ops[], int unsignedp)
388 rtx dst = ops[0], src = ops[1];
389 HOST_WIDE_INT width = INTVAL (ops[2]);
390 HOST_WIDE_INT start = INTVAL (ops[3]);
391 HOST_WIDE_INT align_mask;
392 rtx s0, s1, mask, r0;
394 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
396 if (MEM_P (src))
398 /* First, determine if we need 1 TImode load or 2. We need only 1
399 if the bits being extracted do not cross the alignment boundary
400 as determined by the MEM and its address. */
402 align_mask = -MEM_ALIGN (src);
403 if ((start & align_mask) == ((start + width - 1) & align_mask))
405 /* Alignment is sufficient for 1 load. */
406 s0 = gen_reg_rtx (TImode);
407 r0 = spu_expand_load (s0, 0, src, start / 8);
408 start &= 7;
409 if (r0)
410 emit_insn (gen_rotqby_ti (s0, s0, r0));
412 else
414 /* Need 2 loads. */
415 s0 = gen_reg_rtx (TImode);
416 s1 = gen_reg_rtx (TImode);
417 r0 = spu_expand_load (s0, s1, src, start / 8);
418 start &= 7;
420 gcc_assert (start + width <= 128);
421 if (r0)
423 rtx r1 = gen_reg_rtx (SImode);
424 mask = gen_reg_rtx (TImode);
425 emit_move_insn (mask, GEN_INT (-1));
426 emit_insn (gen_rotqby_ti (s0, s0, r0));
427 emit_insn (gen_rotqby_ti (s1, s1, r0));
428 if (GET_CODE (r0) == CONST_INT)
429 r1 = GEN_INT (INTVAL (r0) & 15);
430 else
431 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
432 emit_insn (gen_shlqby_ti (mask, mask, r1));
433 emit_insn (gen_selb (s0, s1, s0, mask));
438 else if (GET_CODE (src) == SUBREG)
440 rtx r = SUBREG_REG (src);
441 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
442 s0 = gen_reg_rtx (TImode);
443 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
444 emit_insn (gen_rtx_SET (s0, gen_rtx_ZERO_EXTEND (TImode, r)));
445 else
446 emit_move_insn (s0, src);
448 else
450 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
451 s0 = gen_reg_rtx (TImode);
452 emit_move_insn (s0, src);
455 /* Now s0 is TImode and contains the bits to extract at start. */
457 if (start)
458 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
460 if (128 - width)
461 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, 128 - width, s0, unsignedp);
463 emit_move_insn (dst, s0);
466 void
467 spu_expand_insv (rtx ops[])
469 HOST_WIDE_INT width = INTVAL (ops[1]);
470 HOST_WIDE_INT start = INTVAL (ops[2]);
471 unsigned HOST_WIDE_INT maskbits;
472 machine_mode dst_mode;
473 rtx dst = ops[0], src = ops[3];
474 int dst_size;
475 rtx mask;
476 rtx shift_reg;
477 int shift;
480 if (GET_CODE (ops[0]) == MEM)
481 dst = gen_reg_rtx (TImode);
482 else
483 dst = adjust_operand (dst, &start);
484 dst_mode = GET_MODE (dst);
485 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
487 if (CONSTANT_P (src))
489 machine_mode m =
490 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
491 src = force_reg (m, convert_to_mode (m, src, 0));
493 src = adjust_operand (src, 0);
495 mask = gen_reg_rtx (dst_mode);
496 shift_reg = gen_reg_rtx (dst_mode);
497 shift = dst_size - start - width;
499 /* It's not safe to use subreg here because the compiler assumes
500 that the SUBREG_REG is right justified in the SUBREG. */
501 convert_move (shift_reg, src, 1);
503 if (shift > 0)
505 switch (dst_mode)
507 case E_SImode:
508 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
509 break;
510 case E_DImode:
511 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
512 break;
513 case E_TImode:
514 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
515 break;
516 default:
517 abort ();
520 else if (shift < 0)
521 abort ();
523 switch (dst_size)
525 case 32:
526 maskbits = (~(unsigned HOST_WIDE_INT)0 << (32 - width - start));
527 if (start)
528 maskbits += ((unsigned HOST_WIDE_INT)1 << (32 - start));
529 emit_move_insn (mask, GEN_INT (maskbits));
530 break;
531 case 64:
532 maskbits = (~(unsigned HOST_WIDE_INT)0 << (64 - width - start));
533 if (start)
534 maskbits += ((unsigned HOST_WIDE_INT)1 << (64 - start));
535 emit_move_insn (mask, GEN_INT (maskbits));
536 break;
537 case 128:
539 unsigned char arr[16];
540 int i = start / 8;
541 memset (arr, 0, sizeof (arr));
542 arr[i] = 0xff >> (start & 7);
543 for (i++; i <= (start + width - 1) / 8; i++)
544 arr[i] = 0xff;
545 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
546 emit_move_insn (mask, array_to_constant (TImode, arr));
548 break;
549 default:
550 abort ();
552 if (GET_CODE (ops[0]) == MEM)
554 rtx low = gen_reg_rtx (SImode);
555 rtx rotl = gen_reg_rtx (SImode);
556 rtx mask0 = gen_reg_rtx (TImode);
557 rtx addr;
558 rtx addr0;
559 rtx addr1;
560 rtx mem;
562 addr = force_reg (Pmode, XEXP (ops[0], 0));
563 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
564 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
565 emit_insn (gen_negsi2 (rotl, low));
566 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
567 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
568 mem = change_address (ops[0], TImode, addr0);
569 set_mem_alias_set (mem, 0);
570 emit_move_insn (dst, mem);
571 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
572 if (start + width > MEM_ALIGN (ops[0]))
574 rtx shl = gen_reg_rtx (SImode);
575 rtx mask1 = gen_reg_rtx (TImode);
576 rtx dst1 = gen_reg_rtx (TImode);
577 rtx mem1;
578 addr1 = plus_constant (Pmode, addr, 16);
579 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
580 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
581 emit_insn (gen_shlqby_ti (mask1, mask, shl));
582 mem1 = change_address (ops[0], TImode, addr1);
583 set_mem_alias_set (mem1, 0);
584 emit_move_insn (dst1, mem1);
585 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
586 emit_move_insn (mem1, dst1);
588 emit_move_insn (mem, dst);
590 else
591 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
596 spu_expand_block_move (rtx ops[])
598 HOST_WIDE_INT bytes, align, offset;
599 rtx src, dst, sreg, dreg, target;
600 int i;
601 if (GET_CODE (ops[2]) != CONST_INT
602 || GET_CODE (ops[3]) != CONST_INT
603 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
604 return 0;
606 bytes = INTVAL (ops[2]);
607 align = INTVAL (ops[3]);
609 if (bytes <= 0)
610 return 1;
612 dst = ops[0];
613 src = ops[1];
615 if (align == 16)
617 for (offset = 0; offset + 16 <= bytes; offset += 16)
619 dst = adjust_address (ops[0], V16QImode, offset);
620 src = adjust_address (ops[1], V16QImode, offset);
621 emit_move_insn (dst, src);
623 if (offset < bytes)
625 rtx mask;
626 unsigned char arr[16] = { 0 };
627 for (i = 0; i < bytes - offset; i++)
628 arr[i] = 0xff;
629 dst = adjust_address (ops[0], V16QImode, offset);
630 src = adjust_address (ops[1], V16QImode, offset);
631 mask = gen_reg_rtx (V16QImode);
632 sreg = gen_reg_rtx (V16QImode);
633 dreg = gen_reg_rtx (V16QImode);
634 target = gen_reg_rtx (V16QImode);
635 emit_move_insn (mask, array_to_constant (V16QImode, arr));
636 emit_move_insn (dreg, dst);
637 emit_move_insn (sreg, src);
638 emit_insn (gen_selb (target, dreg, sreg, mask));
639 emit_move_insn (dst, target);
641 return 1;
643 return 0;
646 enum spu_comp_code
647 { SPU_EQ, SPU_GT, SPU_GTU };
649 int spu_comp_icode[12][3] = {
650 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
651 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
652 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
653 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
654 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
655 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
656 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
657 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
658 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
659 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
660 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
661 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
664 /* Generate a compare for CODE. Return a brand-new rtx that represents
665 the result of the compare. GCC can figure this out too if we don't
666 provide all variations of compares, but GCC always wants to use
667 WORD_MODE, we can generate better code in most cases if we do it
668 ourselves. */
669 void
670 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
672 int reverse_compare = 0;
673 int reverse_test = 0;
674 rtx compare_result, eq_result;
675 rtx comp_rtx, eq_rtx;
676 machine_mode comp_mode;
677 machine_mode op_mode;
678 enum spu_comp_code scode, eq_code;
679 enum insn_code ior_code;
680 enum rtx_code code = GET_CODE (cmp);
681 rtx op0 = XEXP (cmp, 0);
682 rtx op1 = XEXP (cmp, 1);
683 int index;
684 int eq_test = 0;
686 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
687 and so on, to keep the constant in operand 1. */
688 if (GET_CODE (op1) == CONST_INT)
690 HOST_WIDE_INT val = INTVAL (op1) - 1;
691 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
692 switch (code)
694 case GE:
695 op1 = GEN_INT (val);
696 code = GT;
697 break;
698 case LT:
699 op1 = GEN_INT (val);
700 code = LE;
701 break;
702 case GEU:
703 op1 = GEN_INT (val);
704 code = GTU;
705 break;
706 case LTU:
707 op1 = GEN_INT (val);
708 code = LEU;
709 break;
710 default:
711 break;
715 /* However, if we generate an integer result, performing a reverse test
716 would require an extra negation, so avoid that where possible. */
717 if (GET_CODE (op1) == CONST_INT && is_set == 1)
719 HOST_WIDE_INT val = INTVAL (op1) + 1;
720 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
721 switch (code)
723 case LE:
724 op1 = GEN_INT (val);
725 code = LT;
726 break;
727 case LEU:
728 op1 = GEN_INT (val);
729 code = LTU;
730 break;
731 default:
732 break;
736 comp_mode = SImode;
737 op_mode = GET_MODE (op0);
739 switch (code)
741 case GE:
742 scode = SPU_GT;
743 if (HONOR_NANS (op_mode))
745 reverse_compare = 0;
746 reverse_test = 0;
747 eq_test = 1;
748 eq_code = SPU_EQ;
750 else
752 reverse_compare = 1;
753 reverse_test = 1;
755 break;
756 case LE:
757 scode = SPU_GT;
758 if (HONOR_NANS (op_mode))
760 reverse_compare = 1;
761 reverse_test = 0;
762 eq_test = 1;
763 eq_code = SPU_EQ;
765 else
767 reverse_compare = 0;
768 reverse_test = 1;
770 break;
771 case LT:
772 reverse_compare = 1;
773 reverse_test = 0;
774 scode = SPU_GT;
775 break;
776 case GEU:
777 reverse_compare = 1;
778 reverse_test = 1;
779 scode = SPU_GTU;
780 break;
781 case LEU:
782 reverse_compare = 0;
783 reverse_test = 1;
784 scode = SPU_GTU;
785 break;
786 case LTU:
787 reverse_compare = 1;
788 reverse_test = 0;
789 scode = SPU_GTU;
790 break;
791 case NE:
792 reverse_compare = 0;
793 reverse_test = 1;
794 scode = SPU_EQ;
795 break;
797 case EQ:
798 scode = SPU_EQ;
799 break;
800 case GT:
801 scode = SPU_GT;
802 break;
803 case GTU:
804 scode = SPU_GTU;
805 break;
806 default:
807 scode = SPU_EQ;
808 break;
811 switch (op_mode)
813 case E_QImode:
814 index = 0;
815 comp_mode = QImode;
816 break;
817 case E_HImode:
818 index = 1;
819 comp_mode = HImode;
820 break;
821 case E_SImode:
822 index = 2;
823 break;
824 case E_DImode:
825 index = 3;
826 break;
827 case E_TImode:
828 index = 4;
829 break;
830 case E_SFmode:
831 index = 5;
832 break;
833 case E_DFmode:
834 index = 6;
835 break;
836 case E_V16QImode:
837 index = 7;
838 comp_mode = op_mode;
839 break;
840 case E_V8HImode:
841 index = 8;
842 comp_mode = op_mode;
843 break;
844 case E_V4SImode:
845 index = 9;
846 comp_mode = op_mode;
847 break;
848 case E_V4SFmode:
849 index = 10;
850 comp_mode = V4SImode;
851 break;
852 case E_V2DFmode:
853 index = 11;
854 comp_mode = V2DImode;
855 break;
856 case E_V2DImode:
857 default:
858 abort ();
861 if (GET_MODE (op1) == DFmode
862 && (scode != SPU_GT && scode != SPU_EQ))
863 abort ();
865 if (is_set == 0 && op1 == const0_rtx
866 && (GET_MODE (op0) == SImode
867 || GET_MODE (op0) == HImode
868 || GET_MODE (op0) == QImode) && scode == SPU_EQ)
870 /* Don't need to set a register with the result when we are
871 comparing against zero and branching. */
872 reverse_test = !reverse_test;
873 compare_result = op0;
875 else
877 compare_result = gen_reg_rtx (comp_mode);
879 if (reverse_compare)
881 rtx t = op1;
882 op1 = op0;
883 op0 = t;
886 if (spu_comp_icode[index][scode] == 0)
887 abort ();
889 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
890 (op0, op_mode))
891 op0 = force_reg (op_mode, op0);
892 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
893 (op1, op_mode))
894 op1 = force_reg (op_mode, op1);
895 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
896 op0, op1);
897 if (comp_rtx == 0)
898 abort ();
899 emit_insn (comp_rtx);
901 if (eq_test)
903 eq_result = gen_reg_rtx (comp_mode);
904 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
905 op0, op1);
906 if (eq_rtx == 0)
907 abort ();
908 emit_insn (eq_rtx);
909 ior_code = optab_handler (ior_optab, comp_mode);
910 gcc_assert (ior_code != CODE_FOR_nothing);
911 emit_insn (GEN_FCN (ior_code)
912 (compare_result, compare_result, eq_result));
916 if (is_set == 0)
918 rtx bcomp;
919 rtx loc_ref;
921 /* We don't have branch on QI compare insns, so we convert the
922 QI compare result to a HI result. */
923 if (comp_mode == QImode)
925 rtx old_res = compare_result;
926 compare_result = gen_reg_rtx (HImode);
927 comp_mode = HImode;
928 emit_insn (gen_extendqihi2 (compare_result, old_res));
931 if (reverse_test)
932 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
933 else
934 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
936 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
937 emit_jump_insn (gen_rtx_SET (pc_rtx,
938 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
939 loc_ref, pc_rtx)));
941 else if (is_set == 2)
943 rtx target = operands[0];
944 int compare_size = GET_MODE_BITSIZE (comp_mode);
945 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
946 machine_mode mode = int_mode_for_size (target_size, 0).require ();
947 rtx select_mask;
948 rtx op_t = operands[2];
949 rtx op_f = operands[3];
951 /* The result of the comparison can be SI, HI or QI mode. Create a
952 mask based on that result. */
953 if (target_size > compare_size)
955 select_mask = gen_reg_rtx (mode);
956 emit_insn (gen_extend_compare (select_mask, compare_result));
958 else if (target_size < compare_size)
959 select_mask =
960 gen_rtx_SUBREG (mode, compare_result,
961 (compare_size - target_size) / BITS_PER_UNIT);
962 else if (comp_mode != mode)
963 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
964 else
965 select_mask = compare_result;
967 if (GET_MODE (target) != GET_MODE (op_t)
968 || GET_MODE (target) != GET_MODE (op_f))
969 abort ();
971 if (reverse_test)
972 emit_insn (gen_selb (target, op_t, op_f, select_mask));
973 else
974 emit_insn (gen_selb (target, op_f, op_t, select_mask));
976 else
978 rtx target = operands[0];
979 if (reverse_test)
980 emit_insn (gen_rtx_SET (compare_result,
981 gen_rtx_NOT (comp_mode, compare_result)));
982 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
983 emit_insn (gen_extendhisi2 (target, compare_result));
984 else if (GET_MODE (target) == SImode
985 && GET_MODE (compare_result) == QImode)
986 emit_insn (gen_extend_compare (target, compare_result));
987 else
988 emit_move_insn (target, compare_result);
992 HOST_WIDE_INT
993 const_double_to_hwint (rtx x)
995 HOST_WIDE_INT val;
996 if (GET_MODE (x) == SFmode)
997 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x), val);
998 else if (GET_MODE (x) == DFmode)
1000 long l[2];
1001 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x), l);
1002 val = l[0];
1003 val = (val << 32) | (l[1] & 0xffffffff);
1005 else
1006 abort ();
1007 return val;
1011 hwint_to_const_double (machine_mode mode, HOST_WIDE_INT v)
1013 long tv[2];
1014 REAL_VALUE_TYPE rv;
1015 gcc_assert (mode == SFmode || mode == DFmode);
1017 if (mode == SFmode)
1018 tv[0] = (v << 32) >> 32;
1019 else if (mode == DFmode)
1021 tv[1] = (v << 32) >> 32;
1022 tv[0] = v >> 32;
1024 real_from_target (&rv, tv, mode);
1025 return const_double_from_real_value (rv, mode);
1028 void
1029 print_operand_address (FILE * file, register rtx addr)
1031 rtx reg;
1032 rtx offset;
1034 if (GET_CODE (addr) == AND
1035 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1036 && INTVAL (XEXP (addr, 1)) == -16)
1037 addr = XEXP (addr, 0);
1039 switch (GET_CODE (addr))
1041 case REG:
1042 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1043 break;
1045 case PLUS:
1046 reg = XEXP (addr, 0);
1047 offset = XEXP (addr, 1);
1048 if (GET_CODE (offset) == REG)
1050 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1051 reg_names[REGNO (offset)]);
1053 else if (GET_CODE (offset) == CONST_INT)
1055 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1056 INTVAL (offset), reg_names[REGNO (reg)]);
1058 else
1059 abort ();
1060 break;
1062 case CONST:
1063 case LABEL_REF:
1064 case SYMBOL_REF:
1065 case CONST_INT:
1066 output_addr_const (file, addr);
1067 break;
1069 default:
1070 debug_rtx (addr);
1071 abort ();
1075 void
1076 print_operand (FILE * file, rtx x, int code)
1078 machine_mode mode = GET_MODE (x);
1079 HOST_WIDE_INT val;
1080 unsigned char arr[16];
1081 int xcode = GET_CODE (x);
1082 int i, info;
1083 if (GET_MODE (x) == VOIDmode)
1084 switch (code)
1086 case 'L': /* 128 bits, signed */
1087 case 'm': /* 128 bits, signed */
1088 case 'T': /* 128 bits, signed */
1089 case 't': /* 128 bits, signed */
1090 mode = TImode;
1091 break;
1092 case 'K': /* 64 bits, signed */
1093 case 'k': /* 64 bits, signed */
1094 case 'D': /* 64 bits, signed */
1095 case 'd': /* 64 bits, signed */
1096 mode = DImode;
1097 break;
1098 case 'J': /* 32 bits, signed */
1099 case 'j': /* 32 bits, signed */
1100 case 's': /* 32 bits, signed */
1101 case 'S': /* 32 bits, signed */
1102 mode = SImode;
1103 break;
1105 switch (code)
1108 case 'j': /* 32 bits, signed */
1109 case 'k': /* 64 bits, signed */
1110 case 'm': /* 128 bits, signed */
1111 if (xcode == CONST_INT
1112 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1114 gcc_assert (logical_immediate_p (x, mode));
1115 constant_to_array (mode, x, arr);
1116 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1117 val = trunc_int_for_mode (val, SImode);
1118 switch (which_logical_immediate (val))
1120 case SPU_ORI:
1121 break;
1122 case SPU_ORHI:
1123 fprintf (file, "h");
1124 break;
1125 case SPU_ORBI:
1126 fprintf (file, "b");
1127 break;
1128 default:
1129 gcc_unreachable();
1132 else
1133 gcc_unreachable();
1134 return;
1136 case 'J': /* 32 bits, signed */
1137 case 'K': /* 64 bits, signed */
1138 case 'L': /* 128 bits, signed */
1139 if (xcode == CONST_INT
1140 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1142 gcc_assert (logical_immediate_p (x, mode)
1143 || iohl_immediate_p (x, mode));
1144 constant_to_array (mode, x, arr);
1145 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1146 val = trunc_int_for_mode (val, SImode);
1147 switch (which_logical_immediate (val))
1149 case SPU_ORI:
1150 case SPU_IOHL:
1151 break;
1152 case SPU_ORHI:
1153 val = trunc_int_for_mode (val, HImode);
1154 break;
1155 case SPU_ORBI:
1156 val = trunc_int_for_mode (val, QImode);
1157 break;
1158 default:
1159 gcc_unreachable();
1161 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1163 else
1164 gcc_unreachable();
1165 return;
1167 case 't': /* 128 bits, signed */
1168 case 'd': /* 64 bits, signed */
1169 case 's': /* 32 bits, signed */
1170 if (CONSTANT_P (x))
1172 enum immediate_class c = classify_immediate (x, mode);
1173 switch (c)
1175 case IC_IL1:
1176 constant_to_array (mode, x, arr);
1177 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1178 val = trunc_int_for_mode (val, SImode);
1179 switch (which_immediate_load (val))
1181 case SPU_IL:
1182 break;
1183 case SPU_ILA:
1184 fprintf (file, "a");
1185 break;
1186 case SPU_ILH:
1187 fprintf (file, "h");
1188 break;
1189 case SPU_ILHU:
1190 fprintf (file, "hu");
1191 break;
1192 default:
1193 gcc_unreachable ();
1195 break;
1196 case IC_CPAT:
1197 constant_to_array (mode, x, arr);
1198 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1199 if (info == 1)
1200 fprintf (file, "b");
1201 else if (info == 2)
1202 fprintf (file, "h");
1203 else if (info == 4)
1204 fprintf (file, "w");
1205 else if (info == 8)
1206 fprintf (file, "d");
1207 break;
1208 case IC_IL1s:
1209 if (xcode == CONST_VECTOR)
1211 x = CONST_VECTOR_ELT (x, 0);
1212 xcode = GET_CODE (x);
1214 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1215 fprintf (file, "a");
1216 else if (xcode == HIGH)
1217 fprintf (file, "hu");
1218 break;
1219 case IC_FSMBI:
1220 case IC_FSMBI2:
1221 case IC_IL2:
1222 case IC_IL2s:
1223 case IC_POOL:
1224 abort ();
1227 else
1228 gcc_unreachable ();
1229 return;
1231 case 'T': /* 128 bits, signed */
1232 case 'D': /* 64 bits, signed */
1233 case 'S': /* 32 bits, signed */
1234 if (CONSTANT_P (x))
1236 enum immediate_class c = classify_immediate (x, mode);
1237 switch (c)
1239 case IC_IL1:
1240 constant_to_array (mode, x, arr);
1241 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1242 val = trunc_int_for_mode (val, SImode);
1243 switch (which_immediate_load (val))
1245 case SPU_IL:
1246 case SPU_ILA:
1247 break;
1248 case SPU_ILH:
1249 case SPU_ILHU:
1250 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1251 break;
1252 default:
1253 gcc_unreachable ();
1255 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1256 break;
1257 case IC_FSMBI:
1258 constant_to_array (mode, x, arr);
1259 val = 0;
1260 for (i = 0; i < 16; i++)
1262 val <<= 1;
1263 val |= arr[i] & 1;
1265 print_operand (file, GEN_INT (val), 0);
1266 break;
1267 case IC_CPAT:
1268 constant_to_array (mode, x, arr);
1269 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1270 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1271 break;
1272 case IC_IL1s:
1273 if (xcode == HIGH)
1274 x = XEXP (x, 0);
1275 if (GET_CODE (x) == CONST_VECTOR)
1276 x = CONST_VECTOR_ELT (x, 0);
1277 output_addr_const (file, x);
1278 if (xcode == HIGH)
1279 fprintf (file, "@h");
1280 break;
1281 case IC_IL2:
1282 case IC_IL2s:
1283 case IC_FSMBI2:
1284 case IC_POOL:
1285 abort ();
1288 else
1289 gcc_unreachable ();
1290 return;
1292 case 'C':
1293 if (xcode == CONST_INT)
1295 /* Only 4 least significant bits are relevant for generate
1296 control word instructions. */
1297 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1298 return;
1300 break;
1302 case 'M': /* print code for c*d */
1303 if (GET_CODE (x) == CONST_INT)
1304 switch (INTVAL (x))
1306 case 1:
1307 fprintf (file, "b");
1308 break;
1309 case 2:
1310 fprintf (file, "h");
1311 break;
1312 case 4:
1313 fprintf (file, "w");
1314 break;
1315 case 8:
1316 fprintf (file, "d");
1317 break;
1318 default:
1319 gcc_unreachable();
1321 else
1322 gcc_unreachable();
1323 return;
1325 case 'N': /* Negate the operand */
1326 if (xcode == CONST_INT)
1327 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1328 else if (xcode == CONST_VECTOR)
1329 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1330 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1331 return;
1333 case 'I': /* enable/disable interrupts */
1334 if (xcode == CONST_INT)
1335 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1336 return;
1338 case 'b': /* branch modifiers */
1339 if (xcode == REG)
1340 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1341 else if (COMPARISON_P (x))
1342 fprintf (file, "%s", xcode == NE ? "n" : "");
1343 return;
1345 case 'i': /* indirect call */
1346 if (xcode == MEM)
1348 if (GET_CODE (XEXP (x, 0)) == REG)
1349 /* Used in indirect function calls. */
1350 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1351 else
1352 output_address (GET_MODE (x), XEXP (x, 0));
1354 return;
1356 case 'p': /* load/store */
1357 if (xcode == MEM)
1359 x = XEXP (x, 0);
1360 xcode = GET_CODE (x);
1362 if (xcode == AND)
1364 x = XEXP (x, 0);
1365 xcode = GET_CODE (x);
1367 if (xcode == REG)
1368 fprintf (file, "d");
1369 else if (xcode == CONST_INT)
1370 fprintf (file, "a");
1371 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1372 fprintf (file, "r");
1373 else if (xcode == PLUS || xcode == LO_SUM)
1375 if (GET_CODE (XEXP (x, 1)) == REG)
1376 fprintf (file, "x");
1377 else
1378 fprintf (file, "d");
1380 return;
1382 case 'e':
1383 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1384 val &= 0x7;
1385 output_addr_const (file, GEN_INT (val));
1386 return;
1388 case 'f':
1389 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1390 val &= 0x1f;
1391 output_addr_const (file, GEN_INT (val));
1392 return;
1394 case 'g':
1395 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1396 val &= 0x3f;
1397 output_addr_const (file, GEN_INT (val));
1398 return;
1400 case 'h':
1401 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1402 val = (val >> 3) & 0x1f;
1403 output_addr_const (file, GEN_INT (val));
1404 return;
1406 case 'E':
1407 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1408 val = -val;
1409 val &= 0x7;
1410 output_addr_const (file, GEN_INT (val));
1411 return;
1413 case 'F':
1414 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1415 val = -val;
1416 val &= 0x1f;
1417 output_addr_const (file, GEN_INT (val));
1418 return;
1420 case 'G':
1421 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1422 val = -val;
1423 val &= 0x3f;
1424 output_addr_const (file, GEN_INT (val));
1425 return;
1427 case 'H':
1428 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1429 val = -(val & -8ll);
1430 val = (val >> 3) & 0x1f;
1431 output_addr_const (file, GEN_INT (val));
1432 return;
1434 case 'v':
1435 case 'w':
1436 constant_to_array (mode, x, arr);
1437 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1438 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1439 return;
1441 case 0:
1442 if (xcode == REG)
1443 fprintf (file, "%s", reg_names[REGNO (x)]);
1444 else if (xcode == MEM)
1445 output_address (GET_MODE (x), XEXP (x, 0));
1446 else if (xcode == CONST_VECTOR)
1447 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1448 else
1449 output_addr_const (file, x);
1450 return;
1452 /* unused letters
1453 o qr u yz
1454 AB OPQR UVWXYZ */
1455 default:
1456 output_operand_lossage ("invalid %%xn code");
1458 gcc_unreachable ();
1461 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1462 caller saved register. For leaf functions it is more efficient to
1463 use a volatile register because we won't need to save and restore the
1464 pic register. This routine is only valid after register allocation
1465 is completed, so we can pick an unused register. */
1466 static rtx
1467 get_pic_reg (void)
1469 if (!reload_completed && !reload_in_progress)
1470 abort ();
1472 /* If we've already made the decision, we need to keep with it. Once we've
1473 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1474 return true since the register is now live; this should not cause us to
1475 "switch back" to using pic_offset_table_rtx. */
1476 if (!cfun->machine->pic_reg)
1478 if (crtl->is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1479 cfun->machine->pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1480 else
1481 cfun->machine->pic_reg = pic_offset_table_rtx;
1484 return cfun->machine->pic_reg;
1487 /* Split constant addresses to handle cases that are too large.
1488 Add in the pic register when in PIC mode.
1489 Split immediates that require more than 1 instruction. */
1491 spu_split_immediate (rtx * ops)
1493 machine_mode mode = GET_MODE (ops[0]);
1494 enum immediate_class c = classify_immediate (ops[1], mode);
1496 switch (c)
1498 case IC_IL2:
1500 unsigned char arrhi[16];
1501 unsigned char arrlo[16];
1502 rtx to, temp, hi, lo;
1503 int i;
1504 /* We need to do reals as ints because the constant used in the
1505 IOR might not be a legitimate real constant. */
1506 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1507 constant_to_array (mode, ops[1], arrhi);
1508 if (imode != mode)
1509 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1510 else
1511 to = ops[0];
1512 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1513 for (i = 0; i < 16; i += 4)
1515 arrlo[i + 2] = arrhi[i + 2];
1516 arrlo[i + 3] = arrhi[i + 3];
1517 arrlo[i + 0] = arrlo[i + 1] = 0;
1518 arrhi[i + 2] = arrhi[i + 3] = 0;
1520 hi = array_to_constant (imode, arrhi);
1521 lo = array_to_constant (imode, arrlo);
1522 emit_move_insn (temp, hi);
1523 emit_insn (gen_rtx_SET (to, gen_rtx_IOR (imode, temp, lo)));
1524 return 1;
1526 case IC_FSMBI2:
1528 unsigned char arr_fsmbi[16];
1529 unsigned char arr_andbi[16];
1530 rtx to, reg_fsmbi, reg_and;
1531 int i;
1532 /* We need to do reals as ints because the constant used in the
1533 * AND might not be a legitimate real constant. */
1534 scalar_int_mode imode = int_mode_for_mode (mode).require ();
1535 constant_to_array (mode, ops[1], arr_fsmbi);
1536 if (imode != mode)
1537 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1538 else
1539 to = ops[0];
1540 for (i = 0; i < 16; i++)
1541 if (arr_fsmbi[i] != 0)
1543 arr_andbi[0] = arr_fsmbi[i];
1544 arr_fsmbi[i] = 0xff;
1546 for (i = 1; i < 16; i++)
1547 arr_andbi[i] = arr_andbi[0];
1548 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1549 reg_and = array_to_constant (imode, arr_andbi);
1550 emit_move_insn (to, reg_fsmbi);
1551 emit_insn (gen_rtx_SET (to, gen_rtx_AND (imode, to, reg_and)));
1552 return 1;
1554 case IC_POOL:
1555 if (reload_in_progress || reload_completed)
1557 rtx mem = force_const_mem (mode, ops[1]);
1558 if (TARGET_LARGE_MEM)
1560 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1561 emit_move_insn (addr, XEXP (mem, 0));
1562 mem = replace_equiv_address (mem, addr);
1564 emit_move_insn (ops[0], mem);
1565 return 1;
1567 break;
1568 case IC_IL1s:
1569 case IC_IL2s:
1570 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1572 if (c == IC_IL2s)
1574 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1575 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1577 else if (flag_pic)
1578 emit_insn (gen_pic (ops[0], ops[1]));
1579 if (flag_pic)
1581 rtx pic_reg = get_pic_reg ();
1582 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1584 return flag_pic || c == IC_IL2s;
1586 break;
1587 case IC_IL1:
1588 case IC_FSMBI:
1589 case IC_CPAT:
1590 break;
1592 return 0;
1595 /* SAVING is TRUE when we are generating the actual load and store
1596 instructions for REGNO. When determining the size of the stack
1597 needed for saving register we must allocate enough space for the
1598 worst case, because we don't always have the information early enough
1599 to not allocate it. But we can at least eliminate the actual loads
1600 and stores during the prologue/epilogue. */
1601 static int
1602 need_to_save_reg (int regno, int saving)
1604 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1605 return 1;
1606 if (flag_pic
1607 && regno == PIC_OFFSET_TABLE_REGNUM
1608 && (!saving || cfun->machine->pic_reg == pic_offset_table_rtx))
1609 return 1;
1610 return 0;
1613 /* This function is only correct starting with local register
1614 allocation */
1616 spu_saved_regs_size (void)
1618 int reg_save_size = 0;
1619 int regno;
1621 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1622 if (need_to_save_reg (regno, 0))
1623 reg_save_size += 0x10;
1624 return reg_save_size;
1627 static rtx_insn *
1628 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1630 rtx reg = gen_rtx_REG (V4SImode, regno);
1631 rtx mem =
1632 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1633 return emit_insn (gen_movv4si (mem, reg));
1636 static rtx_insn *
1637 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1639 rtx reg = gen_rtx_REG (V4SImode, regno);
1640 rtx mem =
1641 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1642 return emit_insn (gen_movv4si (reg, mem));
1645 /* This happens after reload, so we need to expand it. */
1646 static rtx_insn *
1647 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1649 rtx_insn *insn;
1650 if (satisfies_constraint_K (GEN_INT (imm)))
1652 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1654 else
1656 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1657 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1658 if (REGNO (src) == REGNO (scratch))
1659 abort ();
1661 return insn;
1664 /* Return nonzero if this function is known to have a null epilogue. */
1667 direct_return (void)
1669 if (reload_completed)
1671 if (cfun->static_chain_decl == 0
1672 && (spu_saved_regs_size ()
1673 + get_frame_size ()
1674 + crtl->outgoing_args_size
1675 + crtl->args.pretend_args_size == 0)
1676 && crtl->is_leaf)
1677 return 1;
1679 return 0;
1683 The stack frame looks like this:
1684 +-------------+
1685 | incoming |
1686 | args |
1687 AP -> +-------------+
1688 | $lr save |
1689 +-------------+
1690 prev SP | back chain |
1691 +-------------+
1692 | var args |
1693 | reg save | crtl->args.pretend_args_size bytes
1694 +-------------+
1695 | ... |
1696 | saved regs | spu_saved_regs_size() bytes
1697 FP -> +-------------+
1698 | ... |
1699 | vars | get_frame_size() bytes
1700 HFP -> +-------------+
1701 | ... |
1702 | outgoing |
1703 | args | crtl->outgoing_args_size bytes
1704 +-------------+
1705 | $lr of next |
1706 | frame |
1707 +-------------+
1708 | back chain |
1709 SP -> +-------------+
1712 void
1713 spu_expand_prologue (void)
1715 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1716 HOST_WIDE_INT total_size;
1717 HOST_WIDE_INT saved_regs_size;
1718 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1719 rtx scratch_reg_0, scratch_reg_1;
1720 rtx_insn *insn;
1721 rtx real;
1723 if (flag_pic && optimize == 0 && !cfun->machine->pic_reg)
1724 cfun->machine->pic_reg = pic_offset_table_rtx;
1726 if (spu_naked_function_p (current_function_decl))
1727 return;
1729 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1730 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1732 saved_regs_size = spu_saved_regs_size ();
1733 total_size = size + saved_regs_size
1734 + crtl->outgoing_args_size
1735 + crtl->args.pretend_args_size;
1737 if (!crtl->is_leaf
1738 || cfun->calls_alloca || total_size > 0)
1739 total_size += STACK_POINTER_OFFSET;
1741 /* Save this first because code after this might use the link
1742 register as a scratch register. */
1743 if (!crtl->is_leaf)
1745 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1746 RTX_FRAME_RELATED_P (insn) = 1;
1749 if (total_size > 0)
1751 offset = -crtl->args.pretend_args_size;
1752 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1753 if (need_to_save_reg (regno, 1))
1755 offset -= 16;
1756 insn = frame_emit_store (regno, sp_reg, offset);
1757 RTX_FRAME_RELATED_P (insn) = 1;
1761 if (flag_pic && cfun->machine->pic_reg)
1763 rtx pic_reg = cfun->machine->pic_reg;
1764 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1765 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1768 if (total_size > 0)
1770 if (flag_stack_check || flag_stack_clash_protection)
1772 /* We compare against total_size-1 because
1773 ($sp >= total_size) <=> ($sp > total_size-1) */
1774 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1775 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1776 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1777 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1779 emit_move_insn (scratch_v4si, size_v4si);
1780 size_v4si = scratch_v4si;
1782 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1783 emit_insn (gen_vec_extractv4sisi
1784 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1785 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1788 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1789 the value of the previous $sp because we save it as the back
1790 chain. */
1791 if (total_size <= 2000)
1793 /* In this case we save the back chain first. */
1794 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1795 insn =
1796 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1798 else
1800 insn = emit_move_insn (scratch_reg_0, sp_reg);
1801 insn =
1802 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1804 RTX_FRAME_RELATED_P (insn) = 1;
1805 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1806 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1808 if (total_size > 2000)
1810 /* Save the back chain ptr */
1811 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1814 if (frame_pointer_needed)
1816 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1817 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1818 + crtl->outgoing_args_size;
1819 /* Set the new frame_pointer */
1820 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1821 RTX_FRAME_RELATED_P (insn) = 1;
1822 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1823 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1824 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1828 if (flag_stack_usage_info)
1829 current_function_static_stack_size = total_size;
1832 void
1833 spu_expand_epilogue (bool sibcall_p)
1835 int size = get_frame_size (), offset, regno;
1836 HOST_WIDE_INT saved_regs_size, total_size;
1837 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1838 rtx scratch_reg_0;
1840 if (spu_naked_function_p (current_function_decl))
1841 return;
1843 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1845 saved_regs_size = spu_saved_regs_size ();
1846 total_size = size + saved_regs_size
1847 + crtl->outgoing_args_size
1848 + crtl->args.pretend_args_size;
1850 if (!crtl->is_leaf
1851 || cfun->calls_alloca || total_size > 0)
1852 total_size += STACK_POINTER_OFFSET;
1854 if (total_size > 0)
1856 if (cfun->calls_alloca)
1857 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1858 else
1859 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1862 if (saved_regs_size > 0)
1864 offset = -crtl->args.pretend_args_size;
1865 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1866 if (need_to_save_reg (regno, 1))
1868 offset -= 0x10;
1869 frame_emit_load (regno, sp_reg, offset);
1874 if (!crtl->is_leaf)
1875 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1877 if (!sibcall_p)
1879 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1880 emit_jump_insn (gen__return ());
1885 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1887 if (count != 0)
1888 return 0;
1889 /* This is inefficient because it ends up copying to a save-register
1890 which then gets saved even though $lr has already been saved. But
1891 it does generate better code for leaf functions and we don't need
1892 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1893 used for __builtin_return_address anyway, so maybe we don't care if
1894 it's inefficient. */
1895 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1899 /* Given VAL, generate a constant appropriate for MODE.
1900 If MODE is a vector mode, every element will be VAL.
1901 For TImode, VAL will be zero extended to 128 bits. */
1903 spu_const (machine_mode mode, HOST_WIDE_INT val)
1905 rtx inner;
1907 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1908 || GET_MODE_CLASS (mode) == MODE_FLOAT
1909 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1910 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1912 if (GET_MODE_CLASS (mode) == MODE_INT)
1913 return immed_double_const (val, 0, mode);
1915 /* val is the bit representation of the float */
1916 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1917 return hwint_to_const_double (mode, val);
1919 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1920 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1921 else
1922 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1924 return gen_const_vec_duplicate (mode, inner);
1927 /* Create a MODE vector constant from 4 ints. */
1929 spu_const_from_ints(machine_mode mode, int a, int b, int c, int d)
1931 unsigned char arr[16];
1932 arr[0] = (a >> 24) & 0xff;
1933 arr[1] = (a >> 16) & 0xff;
1934 arr[2] = (a >> 8) & 0xff;
1935 arr[3] = (a >> 0) & 0xff;
1936 arr[4] = (b >> 24) & 0xff;
1937 arr[5] = (b >> 16) & 0xff;
1938 arr[6] = (b >> 8) & 0xff;
1939 arr[7] = (b >> 0) & 0xff;
1940 arr[8] = (c >> 24) & 0xff;
1941 arr[9] = (c >> 16) & 0xff;
1942 arr[10] = (c >> 8) & 0xff;
1943 arr[11] = (c >> 0) & 0xff;
1944 arr[12] = (d >> 24) & 0xff;
1945 arr[13] = (d >> 16) & 0xff;
1946 arr[14] = (d >> 8) & 0xff;
1947 arr[15] = (d >> 0) & 0xff;
1948 return array_to_constant(mode, arr);
1951 /* branch hint stuff */
1953 /* An array of these is used to propagate hints to predecessor blocks. */
1954 struct spu_bb_info
1956 rtx_insn *prop_jump; /* propagated from another block */
1957 int bb_index; /* the original block. */
1959 static struct spu_bb_info *spu_bb_info;
1961 #define STOP_HINT_P(INSN) \
1962 (CALL_P(INSN) \
1963 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1964 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1966 /* 1 when RTX is a hinted branch or its target. We keep track of
1967 what has been hinted so the safe-hint code can test it easily. */
1968 #define HINTED_P(RTX) \
1969 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1971 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1972 #define SCHED_ON_EVEN_P(RTX) \
1973 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1975 /* Emit a nop for INSN such that the two will dual issue. This assumes
1976 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1977 We check for TImode to handle a MULTI1 insn which has dual issued its
1978 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1979 static void
1980 emit_nop_for_insn (rtx_insn *insn)
1982 int p;
1983 rtx_insn *new_insn;
1985 /* We need to handle JUMP_TABLE_DATA separately. */
1986 if (JUMP_TABLE_DATA_P (insn))
1988 new_insn = emit_insn_after (gen_lnop(), insn);
1989 recog_memoized (new_insn);
1990 INSN_LOCATION (new_insn) = UNKNOWN_LOCATION;
1991 return;
1994 p = get_pipe (insn);
1995 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
1996 new_insn = emit_insn_after (gen_lnop (), insn);
1997 else if (p == 1 && GET_MODE (insn) == TImode)
1999 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2000 PUT_MODE (new_insn, TImode);
2001 PUT_MODE (insn, VOIDmode);
2003 else
2004 new_insn = emit_insn_after (gen_lnop (), insn);
2005 recog_memoized (new_insn);
2006 INSN_LOCATION (new_insn) = INSN_LOCATION (insn);
2009 /* Insert nops in basic blocks to meet dual issue alignment
2010 requirements. Also make sure hbrp and hint instructions are at least
2011 one cycle apart, possibly inserting a nop. */
2012 static void
2013 pad_bb(void)
2015 rtx_insn *insn, *next_insn, *prev_insn, *hbr_insn = 0;
2016 int length;
2017 int addr;
2019 /* This sets up INSN_ADDRESSES. */
2020 shorten_branches (get_insns ());
2022 /* Keep track of length added by nops. */
2023 length = 0;
2025 prev_insn = 0;
2026 insn = get_insns ();
2027 if (!active_insn_p (insn))
2028 insn = next_active_insn (insn);
2029 for (; insn; insn = next_insn)
2031 next_insn = next_active_insn (insn);
2032 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2033 || INSN_CODE (insn) == CODE_FOR_hbr)
2035 if (hbr_insn)
2037 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2038 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2039 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2040 || (a1 - a0 == 4))
2042 prev_insn = emit_insn_before (gen_lnop (), insn);
2043 PUT_MODE (prev_insn, GET_MODE (insn));
2044 PUT_MODE (insn, TImode);
2045 INSN_LOCATION (prev_insn) = INSN_LOCATION (insn);
2046 length += 4;
2049 hbr_insn = insn;
2051 if (INSN_CODE (insn) == CODE_FOR_blockage && next_insn)
2053 if (GET_MODE (insn) == TImode)
2054 PUT_MODE (next_insn, TImode);
2055 insn = next_insn;
2056 next_insn = next_active_insn (insn);
2058 addr = INSN_ADDRESSES (INSN_UID (insn));
2059 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2061 if (((addr + length) & 7) != 0)
2063 emit_nop_for_insn (prev_insn);
2064 length += 4;
2067 else if (GET_MODE (insn) == TImode
2068 && ((next_insn && GET_MODE (next_insn) != TImode)
2069 || get_attr_type (insn) == TYPE_MULTI0)
2070 && ((addr + length) & 7) != 0)
2072 /* prev_insn will always be set because the first insn is
2073 always 8-byte aligned. */
2074 emit_nop_for_insn (prev_insn);
2075 length += 4;
2077 prev_insn = insn;
2082 /* Routines for branch hints. */
2084 static void
2085 spu_emit_branch_hint (rtx_insn *before, rtx_insn *branch, rtx target,
2086 int distance, sbitmap blocks)
2088 rtx_insn *hint;
2089 rtx_insn *insn;
2090 rtx_jump_table_data *table;
2092 if (before == 0 || branch == 0 || target == 0)
2093 return;
2095 /* While scheduling we require hints to be no further than 600, so
2096 we need to enforce that here too */
2097 if (distance > 600)
2098 return;
2100 /* If we have a Basic block note, emit it after the basic block note. */
2101 if (NOTE_INSN_BASIC_BLOCK_P (before))
2102 before = NEXT_INSN (before);
2104 rtx_code_label *branch_label = gen_label_rtx ();
2105 LABEL_NUSES (branch_label)++;
2106 LABEL_PRESERVE_P (branch_label) = 1;
2107 insn = emit_label_before (branch_label, branch);
2108 rtx branch_label_ref = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2109 bitmap_set_bit (blocks, BLOCK_FOR_INSN (branch)->index);
2111 hint = emit_insn_before (gen_hbr (branch_label_ref, target), before);
2112 recog_memoized (hint);
2113 INSN_LOCATION (hint) = INSN_LOCATION (branch);
2114 HINTED_P (branch) = 1;
2116 if (GET_CODE (target) == LABEL_REF)
2117 HINTED_P (XEXP (target, 0)) = 1;
2118 else if (tablejump_p (branch, 0, &table))
2120 rtvec vec;
2121 int j;
2122 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2123 vec = XVEC (PATTERN (table), 0);
2124 else
2125 vec = XVEC (PATTERN (table), 1);
2126 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2127 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2130 if (distance >= 588)
2132 /* Make sure the hint isn't scheduled any earlier than this point,
2133 which could make it too far for the branch offest to fit */
2134 insn = emit_insn_before (gen_blockage (), hint);
2135 recog_memoized (insn);
2136 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2138 else if (distance <= 8 * 4)
2140 /* To guarantee at least 8 insns between the hint and branch we
2141 insert nops. */
2142 int d;
2143 for (d = distance; d < 8 * 4; d += 4)
2145 insn =
2146 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2147 recog_memoized (insn);
2148 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2151 /* Make sure any nops inserted aren't scheduled before the hint. */
2152 insn = emit_insn_after (gen_blockage (), hint);
2153 recog_memoized (insn);
2154 INSN_LOCATION (insn) = INSN_LOCATION (hint);
2156 /* Make sure any nops inserted aren't scheduled after the call. */
2157 if (CALL_P (branch) && distance < 8 * 4)
2159 insn = emit_insn_before (gen_blockage (), branch);
2160 recog_memoized (insn);
2161 INSN_LOCATION (insn) = INSN_LOCATION (branch);
2166 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2167 the rtx for the branch target. */
2168 static rtx
2169 get_branch_target (rtx_insn *branch)
2171 if (JUMP_P (branch))
2173 rtx set, src;
2175 /* Return statements */
2176 if (GET_CODE (PATTERN (branch)) == RETURN)
2177 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2179 /* ASM GOTOs. */
2180 if (extract_asm_operands (PATTERN (branch)) != NULL)
2181 return NULL;
2183 set = single_set (branch);
2184 src = SET_SRC (set);
2185 if (GET_CODE (SET_DEST (set)) != PC)
2186 abort ();
2188 if (GET_CODE (src) == IF_THEN_ELSE)
2190 rtx lab = 0;
2191 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2192 if (note)
2194 /* If the more probable case is not a fall through, then
2195 try a branch hint. */
2196 int prob = profile_probability::from_reg_br_prob_note
2197 (XINT (note, 0)).to_reg_br_prob_base ();
2198 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2199 && GET_CODE (XEXP (src, 1)) != PC)
2200 lab = XEXP (src, 1);
2201 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2202 && GET_CODE (XEXP (src, 2)) != PC)
2203 lab = XEXP (src, 2);
2205 if (lab)
2207 if (GET_CODE (lab) == RETURN)
2208 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2209 return lab;
2211 return 0;
2214 return src;
2216 else if (CALL_P (branch))
2218 rtx call;
2219 /* All of our call patterns are in a PARALLEL and the CALL is
2220 the first pattern in the PARALLEL. */
2221 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2222 abort ();
2223 call = XVECEXP (PATTERN (branch), 0, 0);
2224 if (GET_CODE (call) == SET)
2225 call = SET_SRC (call);
2226 if (GET_CODE (call) != CALL)
2227 abort ();
2228 return XEXP (XEXP (call, 0), 0);
2230 return 0;
2233 /* The special $hbr register is used to prevent the insn scheduler from
2234 moving hbr insns across instructions which invalidate them. It
2235 should only be used in a clobber, and this function searches for
2236 insns which clobber it. */
2237 static bool
2238 insn_clobbers_hbr (rtx_insn *insn)
2240 if (INSN_P (insn)
2241 && GET_CODE (PATTERN (insn)) == PARALLEL)
2243 rtx parallel = PATTERN (insn);
2244 rtx clobber;
2245 int j;
2246 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2248 clobber = XVECEXP (parallel, 0, j);
2249 if (GET_CODE (clobber) == CLOBBER
2250 && GET_CODE (XEXP (clobber, 0)) == REG
2251 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2252 return 1;
2255 return 0;
2258 /* Search up to 32 insns starting at FIRST:
2259 - at any kind of hinted branch, just return
2260 - at any unconditional branch in the first 15 insns, just return
2261 - at a call or indirect branch, after the first 15 insns, force it to
2262 an even address and return
2263 - at any unconditional branch, after the first 15 insns, force it to
2264 an even address.
2265 At then end of the search, insert an hbrp within 4 insns of FIRST,
2266 and an hbrp within 16 instructions of FIRST.
2268 static void
2269 insert_hbrp_for_ilb_runout (rtx_insn *first)
2271 rtx_insn *insn, *before_4 = 0, *before_16 = 0;
2272 int addr = 0, length, first_addr = -1;
2273 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2274 int insert_lnop_after = 0;
2275 for (insn = first; insn; insn = NEXT_INSN (insn))
2276 if (INSN_P (insn))
2278 if (first_addr == -1)
2279 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2280 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2281 length = get_attr_length (insn);
2283 if (before_4 == 0 && addr + length >= 4 * 4)
2284 before_4 = insn;
2285 /* We test for 14 instructions because the first hbrp will add
2286 up to 2 instructions. */
2287 if (before_16 == 0 && addr + length >= 14 * 4)
2288 before_16 = insn;
2290 if (INSN_CODE (insn) == CODE_FOR_hbr)
2292 /* Make sure an hbrp is at least 2 cycles away from a hint.
2293 Insert an lnop after the hbrp when necessary. */
2294 if (before_4 == 0 && addr > 0)
2296 before_4 = insn;
2297 insert_lnop_after |= 1;
2299 else if (before_4 && addr <= 4 * 4)
2300 insert_lnop_after |= 1;
2301 if (before_16 == 0 && addr > 10 * 4)
2303 before_16 = insn;
2304 insert_lnop_after |= 2;
2306 else if (before_16 && addr <= 14 * 4)
2307 insert_lnop_after |= 2;
2310 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2312 if (addr < hbrp_addr0)
2313 hbrp_addr0 = addr;
2314 else if (addr < hbrp_addr1)
2315 hbrp_addr1 = addr;
2318 if (CALL_P (insn) || JUMP_P (insn))
2320 if (HINTED_P (insn))
2321 return;
2323 /* Any branch after the first 15 insns should be on an even
2324 address to avoid a special case branch. There might be
2325 some nops and/or hbrps inserted, so we test after 10
2326 insns. */
2327 if (addr > 10 * 4)
2328 SCHED_ON_EVEN_P (insn) = 1;
2331 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2332 return;
2335 if (addr + length >= 32 * 4)
2337 gcc_assert (before_4 && before_16);
2338 if (hbrp_addr0 > 4 * 4)
2340 insn =
2341 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2342 recog_memoized (insn);
2343 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2344 INSN_ADDRESSES_NEW (insn,
2345 INSN_ADDRESSES (INSN_UID (before_4)));
2346 PUT_MODE (insn, GET_MODE (before_4));
2347 PUT_MODE (before_4, TImode);
2348 if (insert_lnop_after & 1)
2350 insn = emit_insn_before (gen_lnop (), before_4);
2351 recog_memoized (insn);
2352 INSN_LOCATION (insn) = INSN_LOCATION (before_4);
2353 INSN_ADDRESSES_NEW (insn,
2354 INSN_ADDRESSES (INSN_UID (before_4)));
2355 PUT_MODE (insn, TImode);
2358 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2359 && hbrp_addr1 > 16 * 4)
2361 insn =
2362 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2363 recog_memoized (insn);
2364 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2365 INSN_ADDRESSES_NEW (insn,
2366 INSN_ADDRESSES (INSN_UID (before_16)));
2367 PUT_MODE (insn, GET_MODE (before_16));
2368 PUT_MODE (before_16, TImode);
2369 if (insert_lnop_after & 2)
2371 insn = emit_insn_before (gen_lnop (), before_16);
2372 recog_memoized (insn);
2373 INSN_LOCATION (insn) = INSN_LOCATION (before_16);
2374 INSN_ADDRESSES_NEW (insn,
2375 INSN_ADDRESSES (INSN_UID
2376 (before_16)));
2377 PUT_MODE (insn, TImode);
2380 return;
2383 else if (BARRIER_P (insn))
2384 return;
2388 /* The SPU might hang when it executes 48 inline instructions after a
2389 hinted branch jumps to its hinted target. The beginning of a
2390 function and the return from a call might have been hinted, and
2391 must be handled as well. To prevent a hang we insert 2 hbrps. The
2392 first should be within 6 insns of the branch target. The second
2393 should be within 22 insns of the branch target. When determining
2394 if hbrps are necessary, we look for only 32 inline instructions,
2395 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2396 when inserting new hbrps, we insert them within 4 and 16 insns of
2397 the target. */
2398 static void
2399 insert_hbrp (void)
2401 rtx_insn *insn;
2402 if (TARGET_SAFE_HINTS)
2404 shorten_branches (get_insns ());
2405 /* Insert hbrp at beginning of function */
2406 insn = next_active_insn (get_insns ());
2407 if (insn)
2408 insert_hbrp_for_ilb_runout (insn);
2409 /* Insert hbrp after hinted targets. */
2410 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2411 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2412 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2416 static int in_spu_reorg;
2418 static void
2419 spu_var_tracking (void)
2421 if (flag_var_tracking)
2423 df_analyze ();
2424 timevar_push (TV_VAR_TRACKING);
2425 variable_tracking_main ();
2426 timevar_pop (TV_VAR_TRACKING);
2427 df_finish_pass (false);
2431 /* Insert branch hints. There are no branch optimizations after this
2432 pass, so it's safe to set our branch hints now. */
2433 static void
2434 spu_machine_dependent_reorg (void)
2436 sbitmap blocks;
2437 basic_block bb;
2438 rtx_insn *branch, *insn;
2439 rtx branch_target = 0;
2440 int branch_addr = 0, insn_addr, required_dist = 0;
2441 int i;
2442 unsigned int j;
2444 if (!TARGET_BRANCH_HINTS || optimize == 0)
2446 /* We still do it for unoptimized code because an external
2447 function might have hinted a call or return. */
2448 compute_bb_for_insn ();
2449 insert_hbrp ();
2450 pad_bb ();
2451 spu_var_tracking ();
2452 free_bb_for_insn ();
2453 return;
2456 blocks = sbitmap_alloc (last_basic_block_for_fn (cfun));
2457 bitmap_clear (blocks);
2459 in_spu_reorg = 1;
2460 compute_bb_for_insn ();
2462 /* (Re-)discover loops so that bb->loop_father can be used
2463 in the analysis below. */
2464 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
2466 compact_blocks ();
2468 spu_bb_info =
2469 (struct spu_bb_info *) xcalloc (n_basic_blocks_for_fn (cfun),
2470 sizeof (struct spu_bb_info));
2472 /* We need exact insn addresses and lengths. */
2473 shorten_branches (get_insns ());
2475 for (i = n_basic_blocks_for_fn (cfun) - 1; i >= 0; i--)
2477 bb = BASIC_BLOCK_FOR_FN (cfun, i);
2478 branch = 0;
2479 if (spu_bb_info[i].prop_jump)
2481 branch = spu_bb_info[i].prop_jump;
2482 branch_target = get_branch_target (branch);
2483 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2484 required_dist = spu_hint_dist;
2486 /* Search from end of a block to beginning. In this loop, find
2487 jumps which need a branch and emit them only when:
2488 - it's an indirect branch and we're at the insn which sets
2489 the register
2490 - we're at an insn that will invalidate the hint. e.g., a
2491 call, another hint insn, inline asm that clobbers $hbr, and
2492 some inlined operations (divmodsi4). Don't consider jumps
2493 because they are only at the end of a block and are
2494 considered when we are deciding whether to propagate
2495 - we're getting too far away from the branch. The hbr insns
2496 only have a signed 10 bit offset
2497 We go back as far as possible so the branch will be considered
2498 for propagation when we get to the beginning of the block. */
2499 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2501 if (INSN_P (insn))
2503 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2504 if (branch
2505 && ((GET_CODE (branch_target) == REG
2506 && set_of (branch_target, insn) != NULL_RTX)
2507 || insn_clobbers_hbr (insn)
2508 || branch_addr - insn_addr > 600))
2510 rtx_insn *next = NEXT_INSN (insn);
2511 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2512 if (insn != BB_END (bb)
2513 && branch_addr - next_addr >= required_dist)
2515 if (dump_file)
2516 fprintf (dump_file,
2517 "hint for %i in block %i before %i\n",
2518 INSN_UID (branch), bb->index,
2519 INSN_UID (next));
2520 spu_emit_branch_hint (next, branch, branch_target,
2521 branch_addr - next_addr, blocks);
2523 branch = 0;
2526 /* JUMP_P will only be true at the end of a block. When
2527 branch is already set it means we've previously decided
2528 to propagate a hint for that branch into this block. */
2529 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2531 branch = 0;
2532 if ((branch_target = get_branch_target (insn)))
2534 branch = insn;
2535 branch_addr = insn_addr;
2536 required_dist = spu_hint_dist;
2540 if (insn == BB_HEAD (bb))
2541 break;
2544 if (branch)
2546 /* If we haven't emitted a hint for this branch yet, it might
2547 be profitable to emit it in one of the predecessor blocks,
2548 especially for loops. */
2549 rtx_insn *bbend;
2550 basic_block prev = 0, prop = 0, prev2 = 0;
2551 int loop_exit = 0, simple_loop = 0;
2552 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2554 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2555 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2556 prev = EDGE_PRED (bb, j)->src;
2557 else
2558 prev2 = EDGE_PRED (bb, j)->src;
2560 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2561 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2562 loop_exit = 1;
2563 else if (EDGE_SUCC (bb, j)->dest == bb)
2564 simple_loop = 1;
2566 /* If this branch is a loop exit then propagate to previous
2567 fallthru block. This catches the cases when it is a simple
2568 loop or when there is an initial branch into the loop. */
2569 if (prev && (loop_exit || simple_loop)
2570 && bb_loop_depth (prev) <= bb_loop_depth (bb))
2571 prop = prev;
2573 /* If there is only one adjacent predecessor. Don't propagate
2574 outside this loop. */
2575 else if (prev && single_pred_p (bb)
2576 && prev->loop_father == bb->loop_father)
2577 prop = prev;
2579 /* If this is the JOIN block of a simple IF-THEN then
2580 propagate the hint to the HEADER block. */
2581 else if (prev && prev2
2582 && EDGE_COUNT (bb->preds) == 2
2583 && EDGE_COUNT (prev->preds) == 1
2584 && EDGE_PRED (prev, 0)->src == prev2
2585 && prev2->loop_father == bb->loop_father
2586 && GET_CODE (branch_target) != REG)
2587 prop = prev;
2589 /* Don't propagate when:
2590 - this is a simple loop and the hint would be too far
2591 - this is not a simple loop and there are 16 insns in
2592 this block already
2593 - the predecessor block ends in a branch that will be
2594 hinted
2595 - the predecessor block ends in an insn that invalidates
2596 the hint */
2597 if (prop
2598 && prop->index >= 0
2599 && (bbend = BB_END (prop))
2600 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2601 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2602 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2604 if (dump_file)
2605 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2606 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2607 bb->index, prop->index, bb_loop_depth (bb),
2608 INSN_UID (branch), loop_exit, simple_loop,
2609 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2611 spu_bb_info[prop->index].prop_jump = branch;
2612 spu_bb_info[prop->index].bb_index = i;
2614 else if (branch_addr - next_addr >= required_dist)
2616 if (dump_file)
2617 fprintf (dump_file, "hint for %i in block %i before %i\n",
2618 INSN_UID (branch), bb->index,
2619 INSN_UID (NEXT_INSN (insn)));
2620 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2621 branch_addr - next_addr, blocks);
2623 branch = 0;
2626 free (spu_bb_info);
2628 if (!bitmap_empty_p (blocks))
2629 find_many_sub_basic_blocks (blocks);
2631 /* We have to schedule to make sure alignment is ok. */
2632 FOR_EACH_BB_FN (bb, cfun) bb->flags &= ~BB_DISABLE_SCHEDULE;
2634 /* The hints need to be scheduled, so call it again. */
2635 schedule_insns ();
2636 df_finish_pass (true);
2638 insert_hbrp ();
2640 pad_bb ();
2642 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2643 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2645 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2646 between its branch label and the branch . We don't move the
2647 label because GCC expects it at the beginning of the block. */
2648 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2649 rtx label_ref = XVECEXP (unspec, 0, 0);
2650 rtx_insn *label = as_a <rtx_insn *> (XEXP (label_ref, 0));
2651 rtx_insn *branch;
2652 int offset = 0;
2653 for (branch = NEXT_INSN (label);
2654 !JUMP_P (branch) && !CALL_P (branch);
2655 branch = NEXT_INSN (branch))
2656 if (NONJUMP_INSN_P (branch))
2657 offset += get_attr_length (branch);
2658 if (offset > 0)
2659 XVECEXP (unspec, 0, 0) = plus_constant (Pmode, label_ref, offset);
2662 spu_var_tracking ();
2664 loop_optimizer_finalize ();
2666 free_bb_for_insn ();
2668 in_spu_reorg = 0;
2672 /* Insn scheduling routines, primarily for dual issue. */
2673 static int
2674 spu_sched_issue_rate (void)
2676 return 2;
2679 static int
2680 uses_ls_unit(rtx_insn *insn)
2682 rtx set = single_set (insn);
2683 if (set != 0
2684 && (GET_CODE (SET_DEST (set)) == MEM
2685 || GET_CODE (SET_SRC (set)) == MEM))
2686 return 1;
2687 return 0;
2690 static int
2691 get_pipe (rtx_insn *insn)
2693 enum attr_type t;
2694 /* Handle inline asm */
2695 if (INSN_CODE (insn) == -1)
2696 return -1;
2697 t = get_attr_type (insn);
2698 switch (t)
2700 case TYPE_CONVERT:
2701 return -2;
2702 case TYPE_MULTI0:
2703 return -1;
2705 case TYPE_FX2:
2706 case TYPE_FX3:
2707 case TYPE_SPR:
2708 case TYPE_NOP:
2709 case TYPE_FXB:
2710 case TYPE_FPD:
2711 case TYPE_FP6:
2712 case TYPE_FP7:
2713 return 0;
2715 case TYPE_LNOP:
2716 case TYPE_SHUF:
2717 case TYPE_LOAD:
2718 case TYPE_STORE:
2719 case TYPE_BR:
2720 case TYPE_MULTI1:
2721 case TYPE_HBR:
2722 case TYPE_IPREFETCH:
2723 return 1;
2724 default:
2725 abort ();
2730 /* haifa-sched.c has a static variable that keeps track of the current
2731 cycle. It is passed to spu_sched_reorder, and we record it here for
2732 use by spu_sched_variable_issue. It won't be accurate if the
2733 scheduler updates it's clock_var between the two calls. */
2734 static int clock_var;
2736 /* This is used to keep track of insn alignment. Set to 0 at the
2737 beginning of each block and increased by the "length" attr of each
2738 insn scheduled. */
2739 static int spu_sched_length;
2741 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2742 ready list appropriately in spu_sched_reorder(). */
2743 static int pipe0_clock;
2744 static int pipe1_clock;
2746 static int prev_clock_var;
2748 static int prev_priority;
2750 /* The SPU needs to load the next ilb sometime during the execution of
2751 the previous ilb. There is a potential conflict if every cycle has a
2752 load or store. To avoid the conflict we make sure the load/store
2753 unit is free for at least one cycle during the execution of insns in
2754 the previous ilb. */
2755 static int spu_ls_first;
2756 static int prev_ls_clock;
2758 static void
2759 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2760 int max_ready ATTRIBUTE_UNUSED)
2762 spu_sched_length = 0;
2765 static void
2766 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2767 int max_ready ATTRIBUTE_UNUSED)
2769 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2771 /* When any block might be at least 8-byte aligned, assume they
2772 will all be at least 8-byte aligned to make sure dual issue
2773 works out correctly. */
2774 spu_sched_length = 0;
2776 spu_ls_first = INT_MAX;
2777 clock_var = -1;
2778 prev_ls_clock = -1;
2779 pipe0_clock = -1;
2780 pipe1_clock = -1;
2781 prev_clock_var = -1;
2782 prev_priority = -1;
2785 static int
2786 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2787 int verbose ATTRIBUTE_UNUSED,
2788 rtx_insn *insn, int more)
2790 int len;
2791 int p;
2792 if (GET_CODE (PATTERN (insn)) == USE
2793 || GET_CODE (PATTERN (insn)) == CLOBBER
2794 || (len = get_attr_length (insn)) == 0)
2795 return more;
2797 spu_sched_length += len;
2799 /* Reset on inline asm */
2800 if (INSN_CODE (insn) == -1)
2802 spu_ls_first = INT_MAX;
2803 pipe0_clock = -1;
2804 pipe1_clock = -1;
2805 return 0;
2807 p = get_pipe (insn);
2808 if (p == 0)
2809 pipe0_clock = clock_var;
2810 else
2811 pipe1_clock = clock_var;
2813 if (in_spu_reorg)
2815 if (clock_var - prev_ls_clock > 1
2816 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2817 spu_ls_first = INT_MAX;
2818 if (uses_ls_unit (insn))
2820 if (spu_ls_first == INT_MAX)
2821 spu_ls_first = spu_sched_length;
2822 prev_ls_clock = clock_var;
2825 /* The scheduler hasn't inserted the nop, but we will later on.
2826 Include those nops in spu_sched_length. */
2827 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2828 spu_sched_length += 4;
2829 prev_clock_var = clock_var;
2831 /* more is -1 when called from spu_sched_reorder for new insns
2832 that don't have INSN_PRIORITY */
2833 if (more >= 0)
2834 prev_priority = INSN_PRIORITY (insn);
2837 /* Always try issuing more insns. spu_sched_reorder will decide
2838 when the cycle should be advanced. */
2839 return 1;
2842 /* This function is called for both TARGET_SCHED_REORDER and
2843 TARGET_SCHED_REORDER2. */
2844 static int
2845 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2846 rtx_insn **ready, int *nreadyp, int clock)
2848 int i, nready = *nreadyp;
2849 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
2850 rtx_insn *insn;
2852 clock_var = clock;
2854 if (nready <= 0 || pipe1_clock >= clock)
2855 return 0;
2857 /* Find any rtl insns that don't generate assembly insns and schedule
2858 them first. */
2859 for (i = nready - 1; i >= 0; i--)
2861 insn = ready[i];
2862 if (INSN_CODE (insn) == -1
2863 || INSN_CODE (insn) == CODE_FOR_blockage
2864 || (INSN_P (insn) && get_attr_length (insn) == 0))
2866 ready[i] = ready[nready - 1];
2867 ready[nready - 1] = insn;
2868 return 1;
2872 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
2873 for (i = 0; i < nready; i++)
2874 if (INSN_CODE (ready[i]) != -1)
2876 insn = ready[i];
2877 switch (get_attr_type (insn))
2879 default:
2880 case TYPE_MULTI0:
2881 case TYPE_CONVERT:
2882 case TYPE_FX2:
2883 case TYPE_FX3:
2884 case TYPE_SPR:
2885 case TYPE_NOP:
2886 case TYPE_FXB:
2887 case TYPE_FPD:
2888 case TYPE_FP6:
2889 case TYPE_FP7:
2890 pipe_0 = i;
2891 break;
2892 case TYPE_LOAD:
2893 case TYPE_STORE:
2894 pipe_ls = i;
2895 /* FALLTHRU */
2896 case TYPE_LNOP:
2897 case TYPE_SHUF:
2898 case TYPE_BR:
2899 case TYPE_MULTI1:
2900 case TYPE_HBR:
2901 pipe_1 = i;
2902 break;
2903 case TYPE_IPREFETCH:
2904 pipe_hbrp = i;
2905 break;
2909 /* In the first scheduling phase, schedule loads and stores together
2910 to increase the chance they will get merged during postreload CSE. */
2911 if (!reload_completed && pipe_ls >= 0)
2913 insn = ready[pipe_ls];
2914 ready[pipe_ls] = ready[nready - 1];
2915 ready[nready - 1] = insn;
2916 return 1;
2919 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2920 if (pipe_hbrp >= 0)
2921 pipe_1 = pipe_hbrp;
2923 /* When we have loads/stores in every cycle of the last 15 insns and
2924 we are about to schedule another load/store, emit an hbrp insn
2925 instead. */
2926 if (in_spu_reorg
2927 && spu_sched_length - spu_ls_first >= 4 * 15
2928 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
2930 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2931 recog_memoized (insn);
2932 if (pipe0_clock < clock)
2933 PUT_MODE (insn, TImode);
2934 spu_sched_variable_issue (file, verbose, insn, -1);
2935 return 0;
2938 /* In general, we want to emit nops to increase dual issue, but dual
2939 issue isn't faster when one of the insns could be scheduled later
2940 without effecting the critical path. We look at INSN_PRIORITY to
2941 make a good guess, but it isn't perfect so -mdual-nops=n can be
2942 used to effect it. */
2943 if (in_spu_reorg && spu_dual_nops < 10)
2945 /* When we are at an even address and we are not issuing nops to
2946 improve scheduling then we need to advance the cycle. */
2947 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
2948 && (spu_dual_nops == 0
2949 || (pipe_1 != -1
2950 && prev_priority >
2951 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
2952 return 0;
2954 /* When at an odd address, schedule the highest priority insn
2955 without considering pipeline. */
2956 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
2957 && (spu_dual_nops == 0
2958 || (prev_priority >
2959 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
2960 return 1;
2964 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2965 pipe0 insn in the ready list, schedule it. */
2966 if (pipe0_clock < clock && pipe_0 >= 0)
2967 schedule_i = pipe_0;
2969 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2970 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2971 else
2972 schedule_i = pipe_1;
2974 if (schedule_i > -1)
2976 insn = ready[schedule_i];
2977 ready[schedule_i] = ready[nready - 1];
2978 ready[nready - 1] = insn;
2979 return 1;
2981 return 0;
2984 /* INSN is dependent on DEP_INSN. */
2985 static int
2986 spu_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
2987 int cost, unsigned int)
2989 rtx set;
2991 /* The blockage pattern is used to prevent instructions from being
2992 moved across it and has no cost. */
2993 if (INSN_CODE (insn) == CODE_FOR_blockage
2994 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
2995 return 0;
2997 if ((INSN_P (insn) && get_attr_length (insn) == 0)
2998 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
2999 return 0;
3001 /* Make sure hbrps are spread out. */
3002 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3003 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3004 return 8;
3006 /* Make sure hints and hbrps are 2 cycles apart. */
3007 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3008 || INSN_CODE (insn) == CODE_FOR_hbr)
3009 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3010 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3011 return 2;
3013 /* An hbrp has no real dependency on other insns. */
3014 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3015 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3016 return 0;
3018 /* Assuming that it is unlikely an argument register will be used in
3019 the first cycle of the called function, we reduce the cost for
3020 slightly better scheduling of dep_insn. When not hinted, the
3021 mispredicted branch would hide the cost as well. */
3022 if (CALL_P (insn))
3024 rtx target = get_branch_target (insn);
3025 if (GET_CODE (target) != REG || !set_of (target, insn))
3026 return cost - 2;
3027 return cost;
3030 /* And when returning from a function, let's assume the return values
3031 are completed sooner too. */
3032 if (CALL_P (dep_insn))
3033 return cost - 2;
3035 /* Make sure an instruction that loads from the back chain is schedule
3036 away from the return instruction so a hint is more likely to get
3037 issued. */
3038 if (INSN_CODE (insn) == CODE_FOR__return
3039 && (set = single_set (dep_insn))
3040 && GET_CODE (SET_DEST (set)) == REG
3041 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3042 return 20;
3044 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3045 scheduler makes every insn in a block anti-dependent on the final
3046 jump_insn. We adjust here so higher cost insns will get scheduled
3047 earlier. */
3048 if (JUMP_P (insn) && dep_type == REG_DEP_ANTI)
3049 return insn_sched_cost (dep_insn) - 3;
3051 return cost;
3054 /* Create a CONST_DOUBLE from a string. */
3056 spu_float_const (const char *string, machine_mode mode)
3058 REAL_VALUE_TYPE value;
3059 value = REAL_VALUE_ATOF (string, mode);
3060 return const_double_from_real_value (value, mode);
3064 spu_constant_address_p (rtx x)
3066 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3067 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3068 || GET_CODE (x) == HIGH);
3071 static enum spu_immediate
3072 which_immediate_load (HOST_WIDE_INT val)
3074 gcc_assert (val == trunc_int_for_mode (val, SImode));
3076 if (val >= -0x8000 && val <= 0x7fff)
3077 return SPU_IL;
3078 if (val >= 0 && val <= 0x3ffff)
3079 return SPU_ILA;
3080 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3081 return SPU_ILH;
3082 if ((val & 0xffff) == 0)
3083 return SPU_ILHU;
3085 return SPU_NONE;
3088 /* Return true when OP can be loaded by one of the il instructions, or
3089 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3091 immediate_load_p (rtx op, machine_mode mode)
3093 if (CONSTANT_P (op))
3095 enum immediate_class c = classify_immediate (op, mode);
3096 return c == IC_IL1 || c == IC_IL1s
3097 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3099 return 0;
3102 /* Return true if the first SIZE bytes of arr is a constant that can be
3103 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3104 represent the size and offset of the instruction to use. */
3105 static int
3106 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3108 int cpat, run, i, start;
3109 cpat = 1;
3110 run = 0;
3111 start = -1;
3112 for (i = 0; i < size && cpat; i++)
3113 if (arr[i] != i+16)
3115 if (!run)
3117 start = i;
3118 if (arr[i] == 3)
3119 run = 1;
3120 else if (arr[i] == 2 && arr[i+1] == 3)
3121 run = 2;
3122 else if (arr[i] == 0)
3124 while (arr[i+run] == run && i+run < 16)
3125 run++;
3126 if (run != 4 && run != 8)
3127 cpat = 0;
3129 else
3130 cpat = 0;
3131 if ((i & (run-1)) != 0)
3132 cpat = 0;
3133 i += run;
3135 else
3136 cpat = 0;
3138 if (cpat && (run || size < 16))
3140 if (run == 0)
3141 run = 1;
3142 if (prun)
3143 *prun = run;
3144 if (pstart)
3145 *pstart = start == -1 ? 16-run : start;
3146 return 1;
3148 return 0;
3151 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3152 it into a register. MODE is only valid when OP is a CONST_INT. */
3153 static enum immediate_class
3154 classify_immediate (rtx op, machine_mode mode)
3156 HOST_WIDE_INT val;
3157 unsigned char arr[16];
3158 int i, j, repeated, fsmbi, repeat;
3160 gcc_assert (CONSTANT_P (op));
3162 if (GET_MODE (op) != VOIDmode)
3163 mode = GET_MODE (op);
3165 /* A V4SI const_vector with all identical symbols is ok. */
3166 if (!flag_pic
3167 && mode == V4SImode
3168 && GET_CODE (op) == CONST_VECTOR
3169 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3170 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE)
3171 op = unwrap_const_vec_duplicate (op);
3173 switch (GET_CODE (op))
3175 case SYMBOL_REF:
3176 case LABEL_REF:
3177 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3179 case CONST:
3180 /* We can never know if the resulting address fits in 18 bits and can be
3181 loaded with ila. For now, assume the address will not overflow if
3182 the displacement is "small" (fits 'K' constraint). */
3183 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3185 rtx sym = XEXP (XEXP (op, 0), 0);
3186 rtx cst = XEXP (XEXP (op, 0), 1);
3188 if (GET_CODE (sym) == SYMBOL_REF
3189 && GET_CODE (cst) == CONST_INT
3190 && satisfies_constraint_K (cst))
3191 return IC_IL1s;
3193 return IC_IL2s;
3195 case HIGH:
3196 return IC_IL1s;
3198 case CONST_VECTOR:
3199 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3200 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3201 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3202 return IC_POOL;
3203 /* Fall through. */
3205 case CONST_INT:
3206 case CONST_DOUBLE:
3207 constant_to_array (mode, op, arr);
3209 /* Check that each 4-byte slot is identical. */
3210 repeated = 1;
3211 for (i = 4; i < 16; i += 4)
3212 for (j = 0; j < 4; j++)
3213 if (arr[j] != arr[i + j])
3214 repeated = 0;
3216 if (repeated)
3218 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3219 val = trunc_int_for_mode (val, SImode);
3221 if (which_immediate_load (val) != SPU_NONE)
3222 return IC_IL1;
3225 /* Any mode of 2 bytes or smaller can be loaded with an il
3226 instruction. */
3227 gcc_assert (GET_MODE_SIZE (mode) > 2);
3229 fsmbi = 1;
3230 repeat = 0;
3231 for (i = 0; i < 16 && fsmbi; i++)
3232 if (arr[i] != 0 && repeat == 0)
3233 repeat = arr[i];
3234 else if (arr[i] != 0 && arr[i] != repeat)
3235 fsmbi = 0;
3236 if (fsmbi)
3237 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3239 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3240 return IC_CPAT;
3242 if (repeated)
3243 return IC_IL2;
3245 return IC_POOL;
3246 default:
3247 break;
3249 gcc_unreachable ();
3252 static enum spu_immediate
3253 which_logical_immediate (HOST_WIDE_INT val)
3255 gcc_assert (val == trunc_int_for_mode (val, SImode));
3257 if (val >= -0x200 && val <= 0x1ff)
3258 return SPU_ORI;
3259 if (val >= 0 && val <= 0xffff)
3260 return SPU_IOHL;
3261 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3263 val = trunc_int_for_mode (val, HImode);
3264 if (val >= -0x200 && val <= 0x1ff)
3265 return SPU_ORHI;
3266 if ((val & 0xff) == ((val >> 8) & 0xff))
3268 val = trunc_int_for_mode (val, QImode);
3269 if (val >= -0x200 && val <= 0x1ff)
3270 return SPU_ORBI;
3273 return SPU_NONE;
3276 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3277 CONST_DOUBLEs. */
3278 static int
3279 const_vector_immediate_p (rtx x)
3281 int i;
3282 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3283 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3284 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3285 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3286 return 0;
3287 return 1;
3291 logical_immediate_p (rtx op, machine_mode mode)
3293 HOST_WIDE_INT val;
3294 unsigned char arr[16];
3295 int i, j;
3297 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3298 || GET_CODE (op) == CONST_VECTOR);
3300 if (GET_CODE (op) == CONST_VECTOR
3301 && !const_vector_immediate_p (op))
3302 return 0;
3304 if (GET_MODE (op) != VOIDmode)
3305 mode = GET_MODE (op);
3307 constant_to_array (mode, op, arr);
3309 /* Check that bytes are repeated. */
3310 for (i = 4; i < 16; i += 4)
3311 for (j = 0; j < 4; j++)
3312 if (arr[j] != arr[i + j])
3313 return 0;
3315 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3316 val = trunc_int_for_mode (val, SImode);
3318 i = which_logical_immediate (val);
3319 return i != SPU_NONE && i != SPU_IOHL;
3323 iohl_immediate_p (rtx op, machine_mode mode)
3325 HOST_WIDE_INT val;
3326 unsigned char arr[16];
3327 int i, j;
3329 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3330 || GET_CODE (op) == CONST_VECTOR);
3332 if (GET_CODE (op) == CONST_VECTOR
3333 && !const_vector_immediate_p (op))
3334 return 0;
3336 if (GET_MODE (op) != VOIDmode)
3337 mode = GET_MODE (op);
3339 constant_to_array (mode, op, arr);
3341 /* Check that bytes are repeated. */
3342 for (i = 4; i < 16; i += 4)
3343 for (j = 0; j < 4; j++)
3344 if (arr[j] != arr[i + j])
3345 return 0;
3347 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3348 val = trunc_int_for_mode (val, SImode);
3350 return val >= 0 && val <= 0xffff;
3354 arith_immediate_p (rtx op, machine_mode mode,
3355 HOST_WIDE_INT low, HOST_WIDE_INT high)
3357 HOST_WIDE_INT val;
3358 unsigned char arr[16];
3359 int bytes, i, j;
3361 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3362 || GET_CODE (op) == CONST_VECTOR);
3364 if (GET_CODE (op) == CONST_VECTOR
3365 && !const_vector_immediate_p (op))
3366 return 0;
3368 if (GET_MODE (op) != VOIDmode)
3369 mode = GET_MODE (op);
3371 constant_to_array (mode, op, arr);
3373 bytes = GET_MODE_UNIT_SIZE (mode);
3374 mode = int_mode_for_mode (GET_MODE_INNER (mode)).require ();
3376 /* Check that bytes are repeated. */
3377 for (i = bytes; i < 16; i += bytes)
3378 for (j = 0; j < bytes; j++)
3379 if (arr[j] != arr[i + j])
3380 return 0;
3382 val = arr[0];
3383 for (j = 1; j < bytes; j++)
3384 val = (val << 8) | arr[j];
3386 val = trunc_int_for_mode (val, mode);
3388 return val >= low && val <= high;
3391 /* TRUE when op is an immediate and an exact power of 2, and given that
3392 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3393 all entries must be the same. */
3394 bool
3395 exp2_immediate_p (rtx op, machine_mode mode, int low, int high)
3397 machine_mode int_mode;
3398 HOST_WIDE_INT val;
3399 unsigned char arr[16];
3400 int bytes, i, j;
3402 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3403 || GET_CODE (op) == CONST_VECTOR);
3405 if (GET_CODE (op) == CONST_VECTOR
3406 && !const_vector_immediate_p (op))
3407 return 0;
3409 if (GET_MODE (op) != VOIDmode)
3410 mode = GET_MODE (op);
3412 constant_to_array (mode, op, arr);
3414 mode = GET_MODE_INNER (mode);
3416 bytes = GET_MODE_SIZE (mode);
3417 int_mode = int_mode_for_mode (mode).require ();
3419 /* Check that bytes are repeated. */
3420 for (i = bytes; i < 16; i += bytes)
3421 for (j = 0; j < bytes; j++)
3422 if (arr[j] != arr[i + j])
3423 return 0;
3425 val = arr[0];
3426 for (j = 1; j < bytes; j++)
3427 val = (val << 8) | arr[j];
3429 val = trunc_int_for_mode (val, int_mode);
3431 /* Currently, we only handle SFmode */
3432 gcc_assert (mode == SFmode);
3433 if (mode == SFmode)
3435 int exp = (val >> 23) - 127;
3436 return val > 0 && (val & 0x007fffff) == 0
3437 && exp >= low && exp <= high;
3439 return FALSE;
3442 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3444 static bool
3445 ea_symbol_ref_p (const_rtx x)
3447 tree decl;
3449 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3451 rtx plus = XEXP (x, 0);
3452 rtx op0 = XEXP (plus, 0);
3453 rtx op1 = XEXP (plus, 1);
3454 if (GET_CODE (op1) == CONST_INT)
3455 x = op0;
3458 return (GET_CODE (x) == SYMBOL_REF
3459 && (decl = SYMBOL_REF_DECL (x)) != 0
3460 && TREE_CODE (decl) == VAR_DECL
3461 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3464 /* We accept:
3465 - any 32-bit constant (SImode, SFmode)
3466 - any constant that can be generated with fsmbi (any mode)
3467 - a 64-bit constant where the high and low bits are identical
3468 (DImode, DFmode)
3469 - a 128-bit constant where the four 32-bit words match. */
3470 bool
3471 spu_legitimate_constant_p (machine_mode mode, rtx x)
3473 subrtx_iterator::array_type array;
3474 if (GET_CODE (x) == HIGH)
3475 x = XEXP (x, 0);
3477 /* Reject any __ea qualified reference. These can't appear in
3478 instructions but must be forced to the constant pool. */
3479 FOR_EACH_SUBRTX (iter, array, x, ALL)
3480 if (ea_symbol_ref_p (*iter))
3481 return 0;
3483 /* V4SI with all identical symbols is valid. */
3484 if (!flag_pic
3485 && mode == V4SImode
3486 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3487 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3488 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3489 return const_vec_duplicate_p (x);
3491 if (GET_CODE (x) == CONST_VECTOR
3492 && !const_vector_immediate_p (x))
3493 return 0;
3494 return 1;
3497 /* Valid address are:
3498 - symbol_ref, label_ref, const
3499 - reg
3500 - reg + const_int, where const_int is 16 byte aligned
3501 - reg + reg, alignment doesn't matter
3502 The alignment matters in the reg+const case because lqd and stqd
3503 ignore the 4 least significant bits of the const. We only care about
3504 16 byte modes because the expand phase will change all smaller MEM
3505 references to TImode. */
3506 static bool
3507 spu_legitimate_address_p (machine_mode mode,
3508 rtx x, bool reg_ok_strict)
3510 int aligned = GET_MODE_SIZE (mode) >= 16;
3511 if (aligned
3512 && GET_CODE (x) == AND
3513 && GET_CODE (XEXP (x, 1)) == CONST_INT
3514 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3515 x = XEXP (x, 0);
3516 switch (GET_CODE (x))
3518 case LABEL_REF:
3519 return !TARGET_LARGE_MEM;
3521 case SYMBOL_REF:
3522 case CONST:
3523 /* Keep __ea references until reload so that spu_expand_mov can see them
3524 in MEMs. */
3525 if (ea_symbol_ref_p (x))
3526 return !reload_in_progress && !reload_completed;
3527 return !TARGET_LARGE_MEM;
3529 case CONST_INT:
3530 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3532 case SUBREG:
3533 x = XEXP (x, 0);
3534 if (!REG_P (x))
3535 return 0;
3536 /* FALLTHRU */
3538 case REG:
3539 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3541 case PLUS:
3542 case LO_SUM:
3544 rtx op0 = XEXP (x, 0);
3545 rtx op1 = XEXP (x, 1);
3546 if (GET_CODE (op0) == SUBREG)
3547 op0 = XEXP (op0, 0);
3548 if (GET_CODE (op1) == SUBREG)
3549 op1 = XEXP (op1, 0);
3550 if (GET_CODE (op0) == REG
3551 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3552 && GET_CODE (op1) == CONST_INT
3553 && ((INTVAL (op1) >= -0x2000 && INTVAL (op1) <= 0x1fff)
3554 /* If virtual registers are involved, the displacement will
3555 change later on anyway, so checking would be premature.
3556 Reload will make sure the final displacement after
3557 register elimination is OK. */
3558 || op0 == arg_pointer_rtx
3559 || op0 == frame_pointer_rtx
3560 || op0 == virtual_stack_vars_rtx)
3561 && (!aligned || (INTVAL (op1) & 15) == 0))
3562 return TRUE;
3563 if (GET_CODE (op0) == REG
3564 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3565 && GET_CODE (op1) == REG
3566 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3567 return TRUE;
3569 break;
3571 default:
3572 break;
3574 return FALSE;
3577 /* Like spu_legitimate_address_p, except with named addresses. */
3578 static bool
3579 spu_addr_space_legitimate_address_p (machine_mode mode, rtx x,
3580 bool reg_ok_strict, addr_space_t as)
3582 if (as == ADDR_SPACE_EA)
3583 return (REG_P (x) && (GET_MODE (x) == EAmode));
3585 else if (as != ADDR_SPACE_GENERIC)
3586 gcc_unreachable ();
3588 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3591 /* When the address is reg + const_int, force the const_int into a
3592 register. */
3593 static rtx
3594 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3595 machine_mode mode ATTRIBUTE_UNUSED)
3597 rtx op0, op1;
3598 /* Make sure both operands are registers. */
3599 if (GET_CODE (x) == PLUS)
3601 op0 = XEXP (x, 0);
3602 op1 = XEXP (x, 1);
3603 if (ALIGNED_SYMBOL_REF_P (op0))
3605 op0 = force_reg (Pmode, op0);
3606 mark_reg_pointer (op0, 128);
3608 else if (GET_CODE (op0) != REG)
3609 op0 = force_reg (Pmode, op0);
3610 if (ALIGNED_SYMBOL_REF_P (op1))
3612 op1 = force_reg (Pmode, op1);
3613 mark_reg_pointer (op1, 128);
3615 else if (GET_CODE (op1) != REG)
3616 op1 = force_reg (Pmode, op1);
3617 x = gen_rtx_PLUS (Pmode, op0, op1);
3619 return x;
3622 /* Like spu_legitimate_address, except with named address support. */
3623 static rtx
3624 spu_addr_space_legitimize_address (rtx x, rtx oldx, machine_mode mode,
3625 addr_space_t as)
3627 if (as != ADDR_SPACE_GENERIC)
3628 return x;
3630 return spu_legitimize_address (x, oldx, mode);
3633 /* Reload reg + const_int for out-of-range displacements. */
3635 spu_legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
3636 int opnum, int type)
3638 bool removed_and = false;
3640 if (GET_CODE (ad) == AND
3641 && CONST_INT_P (XEXP (ad, 1))
3642 && INTVAL (XEXP (ad, 1)) == (HOST_WIDE_INT) - 16)
3644 ad = XEXP (ad, 0);
3645 removed_and = true;
3648 if (GET_CODE (ad) == PLUS
3649 && REG_P (XEXP (ad, 0))
3650 && CONST_INT_P (XEXP (ad, 1))
3651 && !(INTVAL (XEXP (ad, 1)) >= -0x2000
3652 && INTVAL (XEXP (ad, 1)) <= 0x1fff))
3654 /* Unshare the sum. */
3655 ad = copy_rtx (ad);
3657 /* Reload the displacement. */
3658 push_reload (XEXP (ad, 1), NULL_RTX, &XEXP (ad, 1), NULL,
3659 BASE_REG_CLASS, GET_MODE (ad), VOIDmode, 0, 0,
3660 opnum, (enum reload_type) type);
3662 /* Add back AND for alignment if we stripped it. */
3663 if (removed_and)
3664 ad = gen_rtx_AND (GET_MODE (ad), ad, GEN_INT (-16));
3666 return ad;
3669 return NULL_RTX;
3672 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3673 struct attribute_spec.handler. */
3674 static tree
3675 spu_handle_fndecl_attribute (tree * node,
3676 tree name,
3677 tree args ATTRIBUTE_UNUSED,
3678 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3680 if (TREE_CODE (*node) != FUNCTION_DECL)
3682 warning (0, "%qE attribute only applies to functions",
3683 name);
3684 *no_add_attrs = true;
3687 return NULL_TREE;
3690 /* Handle the "vector" attribute. */
3691 static tree
3692 spu_handle_vector_attribute (tree * node, tree name,
3693 tree args ATTRIBUTE_UNUSED,
3694 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3696 tree type = *node, result = NULL_TREE;
3697 machine_mode mode;
3698 int unsigned_p;
3700 while (POINTER_TYPE_P (type)
3701 || TREE_CODE (type) == FUNCTION_TYPE
3702 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3703 type = TREE_TYPE (type);
3705 mode = TYPE_MODE (type);
3707 unsigned_p = TYPE_UNSIGNED (type);
3708 switch (mode)
3710 case E_DImode:
3711 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3712 break;
3713 case E_SImode:
3714 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3715 break;
3716 case E_HImode:
3717 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3718 break;
3719 case E_QImode:
3720 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3721 break;
3722 case E_SFmode:
3723 result = V4SF_type_node;
3724 break;
3725 case E_DFmode:
3726 result = V2DF_type_node;
3727 break;
3728 default:
3729 break;
3732 /* Propagate qualifiers attached to the element type
3733 onto the vector type. */
3734 if (result && result != type && TYPE_QUALS (type))
3735 result = build_qualified_type (result, TYPE_QUALS (type));
3737 *no_add_attrs = true; /* No need to hang on to the attribute. */
3739 if (!result)
3740 warning (0, "%qE attribute ignored", name);
3741 else
3742 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3744 return NULL_TREE;
3747 /* Return nonzero if FUNC is a naked function. */
3748 static int
3749 spu_naked_function_p (tree func)
3751 tree a;
3753 if (TREE_CODE (func) != FUNCTION_DECL)
3754 abort ();
3756 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3757 return a != NULL_TREE;
3761 spu_initial_elimination_offset (int from, int to)
3763 int saved_regs_size = spu_saved_regs_size ();
3764 int sp_offset = 0;
3765 if (!crtl->is_leaf || crtl->outgoing_args_size
3766 || get_frame_size () || saved_regs_size)
3767 sp_offset = STACK_POINTER_OFFSET;
3768 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3769 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3770 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3771 return get_frame_size ();
3772 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3773 return sp_offset + crtl->outgoing_args_size
3774 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3775 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3776 return get_frame_size () + saved_regs_size + sp_offset;
3777 else
3778 gcc_unreachable ();
3782 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3784 machine_mode mode = TYPE_MODE (type);
3785 int byte_size = ((mode == BLKmode)
3786 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3788 /* Make sure small structs are left justified in a register. */
3789 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3790 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3792 machine_mode smode;
3793 rtvec v;
3794 int i;
3795 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3796 int n = byte_size / UNITS_PER_WORD;
3797 v = rtvec_alloc (nregs);
3798 for (i = 0; i < n; i++)
3800 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3801 gen_rtx_REG (TImode,
3802 FIRST_RETURN_REGNUM
3803 + i),
3804 GEN_INT (UNITS_PER_WORD * i));
3805 byte_size -= UNITS_PER_WORD;
3808 if (n < nregs)
3810 if (byte_size < 4)
3811 byte_size = 4;
3812 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3813 RTVEC_ELT (v, n) =
3814 gen_rtx_EXPR_LIST (VOIDmode,
3815 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3816 GEN_INT (UNITS_PER_WORD * n));
3818 return gen_rtx_PARALLEL (mode, v);
3820 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3823 static rtx
3824 spu_function_arg (cumulative_args_t cum_v,
3825 machine_mode mode,
3826 const_tree type, bool named ATTRIBUTE_UNUSED)
3828 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3829 int byte_size;
3831 if (*cum >= MAX_REGISTER_ARGS)
3832 return 0;
3834 byte_size = ((mode == BLKmode)
3835 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3837 /* The ABI does not allow parameters to be passed partially in
3838 reg and partially in stack. */
3839 if ((*cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3840 return 0;
3842 /* Make sure small structs are left justified in a register. */
3843 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3844 && byte_size < UNITS_PER_WORD && byte_size > 0)
3846 machine_mode smode;
3847 rtx gr_reg;
3848 if (byte_size < 4)
3849 byte_size = 4;
3850 smode = smallest_int_mode_for_size (byte_size * BITS_PER_UNIT);
3851 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3852 gen_rtx_REG (smode, FIRST_ARG_REGNUM + *cum),
3853 const0_rtx);
3854 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3856 else
3857 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + *cum);
3860 static void
3861 spu_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
3862 const_tree type, bool named ATTRIBUTE_UNUSED)
3864 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
3866 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
3868 : mode == BLKmode
3869 ? ((int_size_in_bytes (type) + 15) / 16)
3870 : mode == VOIDmode
3872 : spu_hard_regno_nregs (FIRST_ARG_REGNUM, mode));
3875 /* Implement TARGET_FUNCTION_ARG_OFFSET. The SPU ABI wants 32/64-bit
3876 types at offset 0 in the quad-word on the stack. 8/16-bit types
3877 should be at offsets 3/2 respectively. */
3879 static HOST_WIDE_INT
3880 spu_function_arg_offset (machine_mode mode, const_tree type)
3882 if (type && INTEGRAL_TYPE_P (type) && GET_MODE_SIZE (mode) < 4)
3883 return 4 - GET_MODE_SIZE (mode);
3884 return 0;
3887 /* Implement TARGET_FUNCTION_ARG_PADDING. */
3889 static pad_direction
3890 spu_function_arg_padding (machine_mode, const_tree)
3892 return PAD_UPWARD;
3895 /* Variable sized types are passed by reference. */
3896 static bool
3897 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
3898 machine_mode mode ATTRIBUTE_UNUSED,
3899 const_tree type, bool named ATTRIBUTE_UNUSED)
3901 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3905 /* Var args. */
3907 /* Create and return the va_list datatype.
3909 On SPU, va_list is an array type equivalent to
3911 typedef struct __va_list_tag
3913 void *__args __attribute__((__aligned(16)));
3914 void *__skip __attribute__((__aligned(16)));
3916 } va_list[1];
3918 where __args points to the arg that will be returned by the next
3919 va_arg(), and __skip points to the previous stack frame such that
3920 when __args == __skip we should advance __args by 32 bytes. */
3921 static tree
3922 spu_build_builtin_va_list (void)
3924 tree f_args, f_skip, record, type_decl;
3925 bool owp;
3927 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3929 type_decl =
3930 build_decl (BUILTINS_LOCATION,
3931 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3933 f_args = build_decl (BUILTINS_LOCATION,
3934 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3935 f_skip = build_decl (BUILTINS_LOCATION,
3936 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3938 DECL_FIELD_CONTEXT (f_args) = record;
3939 SET_DECL_ALIGN (f_args, 128);
3940 DECL_USER_ALIGN (f_args) = 1;
3942 DECL_FIELD_CONTEXT (f_skip) = record;
3943 SET_DECL_ALIGN (f_skip, 128);
3944 DECL_USER_ALIGN (f_skip) = 1;
3946 TYPE_STUB_DECL (record) = type_decl;
3947 TYPE_NAME (record) = type_decl;
3948 TYPE_FIELDS (record) = f_args;
3949 DECL_CHAIN (f_args) = f_skip;
3951 /* We know this is being padded and we want it too. It is an internal
3952 type so hide the warnings from the user. */
3953 owp = warn_padded;
3954 warn_padded = false;
3956 layout_type (record);
3958 warn_padded = owp;
3960 /* The correct type is an array type of one element. */
3961 return build_array_type (record, build_index_type (size_zero_node));
3964 /* Implement va_start by filling the va_list structure VALIST.
3965 NEXTARG points to the first anonymous stack argument.
3967 The following global variables are used to initialize
3968 the va_list structure:
3970 crtl->args.info;
3971 the CUMULATIVE_ARGS for this function
3973 crtl->args.arg_offset_rtx:
3974 holds the offset of the first anonymous stack argument
3975 (relative to the virtual arg pointer). */
3977 static void
3978 spu_va_start (tree valist, rtx nextarg)
3980 tree f_args, f_skip;
3981 tree args, skip, t;
3983 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3984 f_skip = DECL_CHAIN (f_args);
3986 valist = build_simple_mem_ref (valist);
3987 args =
3988 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3989 skip =
3990 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3992 /* Find the __args area. */
3993 t = make_tree (TREE_TYPE (args), nextarg);
3994 if (crtl->args.pretend_args_size > 0)
3995 t = fold_build_pointer_plus_hwi (t, -STACK_POINTER_OFFSET);
3996 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
3997 TREE_SIDE_EFFECTS (t) = 1;
3998 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4000 /* Find the __skip area. */
4001 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4002 t = fold_build_pointer_plus_hwi (t, (crtl->args.pretend_args_size
4003 - STACK_POINTER_OFFSET));
4004 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4005 TREE_SIDE_EFFECTS (t) = 1;
4006 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4009 /* Gimplify va_arg by updating the va_list structure
4010 VALIST as required to retrieve an argument of type
4011 TYPE, and returning that argument.
4013 ret = va_arg(VALIST, TYPE);
4015 generates code equivalent to:
4017 paddedsize = (sizeof(TYPE) + 15) & -16;
4018 if (VALIST.__args + paddedsize > VALIST.__skip
4019 && VALIST.__args <= VALIST.__skip)
4020 addr = VALIST.__skip + 32;
4021 else
4022 addr = VALIST.__args;
4023 VALIST.__args = addr + paddedsize;
4024 ret = *(TYPE *)addr;
4026 static tree
4027 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4028 gimple_seq * post_p ATTRIBUTE_UNUSED)
4030 tree f_args, f_skip;
4031 tree args, skip;
4032 HOST_WIDE_INT size, rsize;
4033 tree addr, tmp;
4034 bool pass_by_reference_p;
4036 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4037 f_skip = DECL_CHAIN (f_args);
4039 args =
4040 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4041 skip =
4042 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4044 addr = create_tmp_var (ptr_type_node, "va_arg");
4046 /* if an object is dynamically sized, a pointer to it is passed
4047 instead of the object itself. */
4048 pass_by_reference_p = pass_by_reference (NULL, TYPE_MODE (type), type,
4049 false);
4050 if (pass_by_reference_p)
4051 type = build_pointer_type (type);
4052 size = int_size_in_bytes (type);
4053 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4055 /* build conditional expression to calculate addr. The expression
4056 will be gimplified later. */
4057 tmp = fold_build_pointer_plus_hwi (unshare_expr (args), rsize);
4058 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4059 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4060 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4061 unshare_expr (skip)));
4063 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4064 fold_build_pointer_plus_hwi (unshare_expr (skip), 32),
4065 unshare_expr (args));
4067 gimplify_assign (addr, tmp, pre_p);
4069 /* update VALIST.__args */
4070 tmp = fold_build_pointer_plus_hwi (addr, rsize);
4071 gimplify_assign (unshare_expr (args), tmp, pre_p);
4073 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4074 addr);
4076 if (pass_by_reference_p)
4077 addr = build_va_arg_indirect_ref (addr);
4079 return build_va_arg_indirect_ref (addr);
4082 /* Save parameter registers starting with the register that corresponds
4083 to the first unnamed parameters. If the first unnamed parameter is
4084 in the stack then save no registers. Set pretend_args_size to the
4085 amount of space needed to save the registers. */
4086 static void
4087 spu_setup_incoming_varargs (cumulative_args_t cum, machine_mode mode,
4088 tree type, int *pretend_size, int no_rtl)
4090 if (!no_rtl)
4092 rtx tmp;
4093 int regno;
4094 int offset;
4095 int ncum = *get_cumulative_args (cum);
4097 /* cum currently points to the last named argument, we want to
4098 start at the next argument. */
4099 spu_function_arg_advance (pack_cumulative_args (&ncum), mode, type, true);
4101 offset = -STACK_POINTER_OFFSET;
4102 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4104 tmp = gen_frame_mem (V4SImode,
4105 plus_constant (Pmode, virtual_incoming_args_rtx,
4106 offset));
4107 emit_move_insn (tmp,
4108 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4109 offset += 16;
4111 *pretend_size = offset + STACK_POINTER_OFFSET;
4115 static void
4116 spu_conditional_register_usage (void)
4118 if (flag_pic)
4120 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4121 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4125 /* This is called any time we inspect the alignment of a register for
4126 addresses. */
4127 static int
4128 reg_aligned_for_addr (rtx x)
4130 int regno =
4131 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4132 return REGNO_POINTER_ALIGN (regno) >= 128;
4135 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4136 into its SYMBOL_REF_FLAGS. */
4137 static void
4138 spu_encode_section_info (tree decl, rtx rtl, int first)
4140 default_encode_section_info (decl, rtl, first);
4142 /* If a variable has a forced alignment to < 16 bytes, mark it with
4143 SYMBOL_FLAG_ALIGN1. */
4144 if (TREE_CODE (decl) == VAR_DECL
4145 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4146 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4149 /* Return TRUE if we are certain the mem refers to a complete object
4150 which is both 16-byte aligned and padded to a 16-byte boundary. This
4151 would make it safe to store with a single instruction.
4152 We guarantee the alignment and padding for static objects by aligning
4153 all of them to 16-bytes. (DATA_ALIGNMENT and TARGET_CONSTANT_ALIGNMENT.)
4154 FIXME: We currently cannot guarantee this for objects on the stack
4155 because assign_parm_setup_stack calls assign_stack_local with the
4156 alignment of the parameter mode and in that case the alignment never
4157 gets adjusted by LOCAL_ALIGNMENT. */
4158 static int
4159 store_with_one_insn_p (rtx mem)
4161 machine_mode mode = GET_MODE (mem);
4162 rtx addr = XEXP (mem, 0);
4163 if (mode == BLKmode)
4164 return 0;
4165 if (GET_MODE_SIZE (mode) >= 16)
4166 return 1;
4167 /* Only static objects. */
4168 if (GET_CODE (addr) == SYMBOL_REF)
4170 /* We use the associated declaration to make sure the access is
4171 referring to the whole object.
4172 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4173 if it is necessary. Will there be cases where one exists, and
4174 the other does not? Will there be cases where both exist, but
4175 have different types? */
4176 tree decl = MEM_EXPR (mem);
4177 if (decl
4178 && TREE_CODE (decl) == VAR_DECL
4179 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4180 return 1;
4181 decl = SYMBOL_REF_DECL (addr);
4182 if (decl
4183 && TREE_CODE (decl) == VAR_DECL
4184 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4185 return 1;
4187 return 0;
4190 /* Return 1 when the address is not valid for a simple load and store as
4191 required by the '_mov*' patterns. We could make this less strict
4192 for loads, but we prefer mem's to look the same so they are more
4193 likely to be merged. */
4194 static int
4195 address_needs_split (rtx mem)
4197 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4198 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4199 || !(store_with_one_insn_p (mem)
4200 || mem_is_padded_component_ref (mem))))
4201 return 1;
4203 return 0;
4206 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4207 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4208 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4210 /* MEM is known to be an __ea qualified memory access. Emit a call to
4211 fetch the ppu memory to local store, and return its address in local
4212 store. */
4214 static void
4215 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4217 if (is_store)
4219 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4220 if (!cache_fetch_dirty)
4221 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4222 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4223 ea_addr, EAmode, ndirty, SImode);
4225 else
4227 if (!cache_fetch)
4228 cache_fetch = init_one_libfunc ("__cache_fetch");
4229 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4230 ea_addr, EAmode);
4234 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4235 dirty bit marking, inline.
4237 The cache control data structure is an array of
4239 struct __cache_tag_array
4241 unsigned int tag_lo[4];
4242 unsigned int tag_hi[4];
4243 void *data_pointer[4];
4244 int reserved[4];
4245 vector unsigned short dirty_bits[4];
4246 } */
4248 static void
4249 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4251 rtx ea_addr_si;
4252 HOST_WIDE_INT v;
4253 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4254 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4255 rtx index_mask = gen_reg_rtx (SImode);
4256 rtx tag_arr = gen_reg_rtx (Pmode);
4257 rtx splat_mask = gen_reg_rtx (TImode);
4258 rtx splat = gen_reg_rtx (V4SImode);
4259 rtx splat_hi = NULL_RTX;
4260 rtx tag_index = gen_reg_rtx (Pmode);
4261 rtx block_off = gen_reg_rtx (SImode);
4262 rtx tag_addr = gen_reg_rtx (Pmode);
4263 rtx tag = gen_reg_rtx (V4SImode);
4264 rtx cache_tag = gen_reg_rtx (V4SImode);
4265 rtx cache_tag_hi = NULL_RTX;
4266 rtx cache_ptrs = gen_reg_rtx (TImode);
4267 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4268 rtx tag_equal = gen_reg_rtx (V4SImode);
4269 rtx tag_equal_hi = NULL_RTX;
4270 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4271 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4272 rtx eq_index = gen_reg_rtx (SImode);
4273 rtx bcomp, hit_label, hit_ref, cont_label;
4274 rtx_insn *insn;
4276 if (spu_ea_model != 32)
4278 splat_hi = gen_reg_rtx (V4SImode);
4279 cache_tag_hi = gen_reg_rtx (V4SImode);
4280 tag_equal_hi = gen_reg_rtx (V4SImode);
4283 emit_move_insn (index_mask, plus_constant (Pmode, tag_size_sym, -128));
4284 emit_move_insn (tag_arr, tag_arr_sym);
4285 v = 0x0001020300010203LL;
4286 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4287 ea_addr_si = ea_addr;
4288 if (spu_ea_model != 32)
4289 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4291 /* tag_index = ea_addr & (tag_array_size - 128) */
4292 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4294 /* splat ea_addr to all 4 slots. */
4295 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4296 /* Similarly for high 32 bits of ea_addr. */
4297 if (spu_ea_model != 32)
4298 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4300 /* block_off = ea_addr & 127 */
4301 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4303 /* tag_addr = tag_arr + tag_index */
4304 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4306 /* Read cache tags. */
4307 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4308 if (spu_ea_model != 32)
4309 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4310 plus_constant (Pmode,
4311 tag_addr, 16)));
4313 /* tag = ea_addr & -128 */
4314 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4316 /* Read all four cache data pointers. */
4317 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4318 plus_constant (Pmode,
4319 tag_addr, 32)));
4321 /* Compare tags. */
4322 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4323 if (spu_ea_model != 32)
4325 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4326 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4329 /* At most one of the tags compare equal, so tag_equal has one
4330 32-bit slot set to all 1's, with the other slots all zero.
4331 gbb picks off low bit from each byte in the 128-bit registers,
4332 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4333 we have a hit. */
4334 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4335 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4337 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4338 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4340 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4341 (rotating eq_index mod 16 bytes). */
4342 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4343 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4345 /* Add block offset to form final data address. */
4346 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4348 /* Check that we did hit. */
4349 hit_label = gen_label_rtx ();
4350 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4351 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4352 insn = emit_jump_insn (gen_rtx_SET (pc_rtx,
4353 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4354 hit_ref, pc_rtx)));
4355 /* Say that this branch is very likely to happen. */
4356 add_reg_br_prob_note (insn, profile_probability::very_likely ());
4358 ea_load_store (mem, is_store, ea_addr, data_addr);
4359 cont_label = gen_label_rtx ();
4360 emit_jump_insn (gen_jump (cont_label));
4361 emit_barrier ();
4363 emit_label (hit_label);
4365 if (is_store)
4367 HOST_WIDE_INT v_hi;
4368 rtx dirty_bits = gen_reg_rtx (TImode);
4369 rtx dirty_off = gen_reg_rtx (SImode);
4370 rtx dirty_128 = gen_reg_rtx (TImode);
4371 rtx neg_block_off = gen_reg_rtx (SImode);
4373 /* Set up mask with one dirty bit per byte of the mem we are
4374 writing, starting from top bit. */
4375 v_hi = v = -1;
4376 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4377 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4379 v_hi = v;
4380 v = 0;
4382 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4384 /* Form index into cache dirty_bits. eq_index is one of
4385 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4386 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4387 offset to each of the four dirty_bits elements. */
4388 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4390 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4392 /* Rotate bit mask to proper bit. */
4393 emit_insn (gen_negsi2 (neg_block_off, block_off));
4394 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4395 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4397 /* Or in the new dirty bits. */
4398 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4400 /* Store. */
4401 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4404 emit_label (cont_label);
4407 static rtx
4408 expand_ea_mem (rtx mem, bool is_store)
4410 rtx ea_addr;
4411 rtx data_addr = gen_reg_rtx (Pmode);
4412 rtx new_mem;
4414 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4415 if (optimize_size || optimize == 0)
4416 ea_load_store (mem, is_store, ea_addr, data_addr);
4417 else
4418 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4420 if (ea_alias_set == -1)
4421 ea_alias_set = new_alias_set ();
4423 /* We generate a new MEM RTX to refer to the copy of the data
4424 in the cache. We do not copy memory attributes (except the
4425 alignment) from the original MEM, as they may no longer apply
4426 to the cache copy. */
4427 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4428 set_mem_alias_set (new_mem, ea_alias_set);
4429 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4431 return new_mem;
4435 spu_expand_mov (rtx * ops, machine_mode mode)
4437 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4439 /* Perform the move in the destination SUBREG's inner mode. */
4440 ops[0] = SUBREG_REG (ops[0]);
4441 mode = GET_MODE (ops[0]);
4442 ops[1] = gen_lowpart_common (mode, ops[1]);
4443 gcc_assert (ops[1]);
4446 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4448 rtx from = SUBREG_REG (ops[1]);
4449 scalar_int_mode imode = int_mode_for_mode (GET_MODE (from)).require ();
4451 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4452 && GET_MODE_CLASS (imode) == MODE_INT
4453 && subreg_lowpart_p (ops[1]));
4455 if (GET_MODE_SIZE (imode) < 4)
4456 imode = SImode;
4457 if (imode != GET_MODE (from))
4458 from = gen_rtx_SUBREG (imode, from, 0);
4460 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4462 enum insn_code icode = convert_optab_handler (trunc_optab,
4463 mode, imode);
4464 emit_insn (GEN_FCN (icode) (ops[0], from));
4466 else
4467 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4468 return 1;
4471 /* At least one of the operands needs to be a register. */
4472 if ((reload_in_progress | reload_completed) == 0
4473 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4475 rtx temp = force_reg (mode, ops[1]);
4476 emit_move_insn (ops[0], temp);
4477 return 1;
4479 if (reload_in_progress || reload_completed)
4481 if (CONSTANT_P (ops[1]))
4482 return spu_split_immediate (ops);
4483 return 0;
4486 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4487 extend them. */
4488 if (GET_CODE (ops[1]) == CONST_INT)
4490 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4491 if (val != INTVAL (ops[1]))
4493 emit_move_insn (ops[0], GEN_INT (val));
4494 return 1;
4497 if (MEM_P (ops[0]))
4499 if (MEM_ADDR_SPACE (ops[0]))
4500 ops[0] = expand_ea_mem (ops[0], true);
4501 return spu_split_store (ops);
4503 if (MEM_P (ops[1]))
4505 if (MEM_ADDR_SPACE (ops[1]))
4506 ops[1] = expand_ea_mem (ops[1], false);
4507 return spu_split_load (ops);
4510 return 0;
4513 static void
4514 spu_convert_move (rtx dst, rtx src)
4516 machine_mode mode = GET_MODE (dst);
4517 machine_mode int_mode = int_mode_for_mode (mode).require ();
4518 rtx reg;
4519 gcc_assert (GET_MODE (src) == TImode);
4520 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4521 emit_insn (gen_rtx_SET (reg,
4522 gen_rtx_TRUNCATE (int_mode,
4523 gen_rtx_LSHIFTRT (TImode, src,
4524 GEN_INT (int_mode == DImode ? 64 : 96)))));
4525 if (int_mode != mode)
4527 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4528 emit_move_insn (dst, reg);
4532 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4533 the address from SRC and SRC+16. Return a REG or CONST_INT that
4534 specifies how many bytes to rotate the loaded registers, plus any
4535 extra from EXTRA_ROTQBY. The address and rotate amounts are
4536 normalized to improve merging of loads and rotate computations. */
4537 static rtx
4538 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4540 rtx addr = XEXP (src, 0);
4541 rtx p0, p1, rot, addr0, addr1;
4542 int rot_amt;
4544 rot = 0;
4545 rot_amt = 0;
4547 if (MEM_ALIGN (src) >= 128)
4548 /* Address is already aligned; simply perform a TImode load. */ ;
4549 else if (GET_CODE (addr) == PLUS)
4551 /* 8 cases:
4552 aligned reg + aligned reg => lqx
4553 aligned reg + unaligned reg => lqx, rotqby
4554 aligned reg + aligned const => lqd
4555 aligned reg + unaligned const => lqd, rotqbyi
4556 unaligned reg + aligned reg => lqx, rotqby
4557 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4558 unaligned reg + aligned const => lqd, rotqby
4559 unaligned reg + unaligned const -> not allowed by legitimate address
4561 p0 = XEXP (addr, 0);
4562 p1 = XEXP (addr, 1);
4563 if (!reg_aligned_for_addr (p0))
4565 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4567 rot = gen_reg_rtx (SImode);
4568 emit_insn (gen_addsi3 (rot, p0, p1));
4570 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4572 if (INTVAL (p1) > 0
4573 && REG_POINTER (p0)
4574 && INTVAL (p1) * BITS_PER_UNIT
4575 < REGNO_POINTER_ALIGN (REGNO (p0)))
4577 rot = gen_reg_rtx (SImode);
4578 emit_insn (gen_addsi3 (rot, p0, p1));
4579 addr = p0;
4581 else
4583 rtx x = gen_reg_rtx (SImode);
4584 emit_move_insn (x, p1);
4585 if (!spu_arith_operand (p1, SImode))
4586 p1 = x;
4587 rot = gen_reg_rtx (SImode);
4588 emit_insn (gen_addsi3 (rot, p0, p1));
4589 addr = gen_rtx_PLUS (Pmode, p0, x);
4592 else
4593 rot = p0;
4595 else
4597 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4599 rot_amt = INTVAL (p1) & 15;
4600 if (INTVAL (p1) & -16)
4602 p1 = GEN_INT (INTVAL (p1) & -16);
4603 addr = gen_rtx_PLUS (SImode, p0, p1);
4605 else
4606 addr = p0;
4608 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4609 rot = p1;
4612 else if (REG_P (addr))
4614 if (!reg_aligned_for_addr (addr))
4615 rot = addr;
4617 else if (GET_CODE (addr) == CONST)
4619 if (GET_CODE (XEXP (addr, 0)) == PLUS
4620 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4621 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4623 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4624 if (rot_amt & -16)
4625 addr = gen_rtx_CONST (Pmode,
4626 gen_rtx_PLUS (Pmode,
4627 XEXP (XEXP (addr, 0), 0),
4628 GEN_INT (rot_amt & -16)));
4629 else
4630 addr = XEXP (XEXP (addr, 0), 0);
4632 else
4634 rot = gen_reg_rtx (Pmode);
4635 emit_move_insn (rot, addr);
4638 else if (GET_CODE (addr) == CONST_INT)
4640 rot_amt = INTVAL (addr);
4641 addr = GEN_INT (rot_amt & -16);
4643 else if (!ALIGNED_SYMBOL_REF_P (addr))
4645 rot = gen_reg_rtx (Pmode);
4646 emit_move_insn (rot, addr);
4649 rot_amt += extra_rotby;
4651 rot_amt &= 15;
4653 if (rot && rot_amt)
4655 rtx x = gen_reg_rtx (SImode);
4656 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4657 rot = x;
4658 rot_amt = 0;
4660 if (!rot && rot_amt)
4661 rot = GEN_INT (rot_amt);
4663 addr0 = copy_rtx (addr);
4664 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4665 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4667 if (dst1)
4669 addr1 = plus_constant (SImode, copy_rtx (addr), 16);
4670 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4671 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4674 return rot;
4678 spu_split_load (rtx * ops)
4680 machine_mode mode = GET_MODE (ops[0]);
4681 rtx addr, load, rot;
4682 int rot_amt;
4684 if (GET_MODE_SIZE (mode) >= 16)
4685 return 0;
4687 addr = XEXP (ops[1], 0);
4688 gcc_assert (GET_CODE (addr) != AND);
4690 if (!address_needs_split (ops[1]))
4692 ops[1] = change_address (ops[1], TImode, addr);
4693 load = gen_reg_rtx (TImode);
4694 emit_insn (gen__movti (load, ops[1]));
4695 spu_convert_move (ops[0], load);
4696 return 1;
4699 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4701 load = gen_reg_rtx (TImode);
4702 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4704 if (rot)
4705 emit_insn (gen_rotqby_ti (load, load, rot));
4707 spu_convert_move (ops[0], load);
4708 return 1;
4712 spu_split_store (rtx * ops)
4714 machine_mode mode = GET_MODE (ops[0]);
4715 rtx reg;
4716 rtx addr, p0, p1, p1_lo, smem;
4717 int aform;
4718 int scalar;
4720 if (GET_MODE_SIZE (mode) >= 16)
4721 return 0;
4723 addr = XEXP (ops[0], 0);
4724 gcc_assert (GET_CODE (addr) != AND);
4726 if (!address_needs_split (ops[0]))
4728 reg = gen_reg_rtx (TImode);
4729 emit_insn (gen_spu_convert (reg, ops[1]));
4730 ops[0] = change_address (ops[0], TImode, addr);
4731 emit_move_insn (ops[0], reg);
4732 return 1;
4735 if (GET_CODE (addr) == PLUS)
4737 /* 8 cases:
4738 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4739 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4740 aligned reg + aligned const => lqd, c?d, shuf, stqx
4741 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4742 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4743 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4744 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4745 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4747 aform = 0;
4748 p0 = XEXP (addr, 0);
4749 p1 = p1_lo = XEXP (addr, 1);
4750 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4752 p1_lo = GEN_INT (INTVAL (p1) & 15);
4753 if (reg_aligned_for_addr (p0))
4755 p1 = GEN_INT (INTVAL (p1) & -16);
4756 if (p1 == const0_rtx)
4757 addr = p0;
4758 else
4759 addr = gen_rtx_PLUS (SImode, p0, p1);
4761 else
4763 rtx x = gen_reg_rtx (SImode);
4764 emit_move_insn (x, p1);
4765 addr = gen_rtx_PLUS (SImode, p0, x);
4769 else if (REG_P (addr))
4771 aform = 0;
4772 p0 = addr;
4773 p1 = p1_lo = const0_rtx;
4775 else
4777 aform = 1;
4778 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4779 p1 = 0; /* aform doesn't use p1 */
4780 p1_lo = addr;
4781 if (ALIGNED_SYMBOL_REF_P (addr))
4782 p1_lo = const0_rtx;
4783 else if (GET_CODE (addr) == CONST
4784 && GET_CODE (XEXP (addr, 0)) == PLUS
4785 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4786 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4788 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4789 if ((v & -16) != 0)
4790 addr = gen_rtx_CONST (Pmode,
4791 gen_rtx_PLUS (Pmode,
4792 XEXP (XEXP (addr, 0), 0),
4793 GEN_INT (v & -16)));
4794 else
4795 addr = XEXP (XEXP (addr, 0), 0);
4796 p1_lo = GEN_INT (v & 15);
4798 else if (GET_CODE (addr) == CONST_INT)
4800 p1_lo = GEN_INT (INTVAL (addr) & 15);
4801 addr = GEN_INT (INTVAL (addr) & -16);
4803 else
4805 p1_lo = gen_reg_rtx (SImode);
4806 emit_move_insn (p1_lo, addr);
4810 gcc_assert (aform == 0 || aform == 1);
4811 reg = gen_reg_rtx (TImode);
4813 scalar = store_with_one_insn_p (ops[0]);
4814 if (!scalar)
4816 /* We could copy the flags from the ops[0] MEM to mem here,
4817 We don't because we want this load to be optimized away if
4818 possible, and copying the flags will prevent that in certain
4819 cases, e.g. consider the volatile flag. */
4821 rtx pat = gen_reg_rtx (TImode);
4822 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4823 set_mem_alias_set (lmem, 0);
4824 emit_insn (gen_movti (reg, lmem));
4826 if (!p0 || reg_aligned_for_addr (p0))
4827 p0 = stack_pointer_rtx;
4828 if (!p1_lo)
4829 p1_lo = const0_rtx;
4831 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4832 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4834 else
4836 if (GET_CODE (ops[1]) == REG)
4837 emit_insn (gen_spu_convert (reg, ops[1]));
4838 else if (GET_CODE (ops[1]) == SUBREG)
4839 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4840 else
4841 abort ();
4844 if (GET_MODE_SIZE (mode) < 4 && scalar)
4845 emit_insn (gen_ashlti3
4846 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4848 smem = change_address (ops[0], TImode, copy_rtx (addr));
4849 /* We can't use the previous alias set because the memory has changed
4850 size and can potentially overlap objects of other types. */
4851 set_mem_alias_set (smem, 0);
4853 emit_insn (gen_movti (smem, reg));
4854 return 1;
4857 /* Return TRUE if X is MEM which is a struct member reference
4858 and the member can safely be loaded and stored with a single
4859 instruction because it is padded. */
4860 static int
4861 mem_is_padded_component_ref (rtx x)
4863 tree t = MEM_EXPR (x);
4864 tree r;
4865 if (!t || TREE_CODE (t) != COMPONENT_REF)
4866 return 0;
4867 t = TREE_OPERAND (t, 1);
4868 if (!t || TREE_CODE (t) != FIELD_DECL
4869 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4870 return 0;
4871 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4872 r = DECL_FIELD_CONTEXT (t);
4873 if (!r || TREE_CODE (r) != RECORD_TYPE)
4874 return 0;
4875 /* Make sure they are the same mode */
4876 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4877 return 0;
4878 /* If there are no following fields then the field alignment assures
4879 the structure is padded to the alignment which means this field is
4880 padded too. */
4881 if (TREE_CHAIN (t) == 0)
4882 return 1;
4883 /* If the following field is also aligned then this field will be
4884 padded. */
4885 t = TREE_CHAIN (t);
4886 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4887 return 1;
4888 return 0;
4891 /* Parse the -mfixed-range= option string. */
4892 static void
4893 fix_range (const char *const_str)
4895 int i, first, last;
4896 char *str, *dash, *comma;
4898 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4899 REG2 are either register names or register numbers. The effect
4900 of this option is to mark the registers in the range from REG1 to
4901 REG2 as ``fixed'' so they won't be used by the compiler. */
4903 i = strlen (const_str);
4904 str = (char *) alloca (i + 1);
4905 memcpy (str, const_str, i + 1);
4907 while (1)
4909 dash = strchr (str, '-');
4910 if (!dash)
4912 warning (0, "value of -mfixed-range must have form REG1-REG2");
4913 return;
4915 *dash = '\0';
4916 comma = strchr (dash + 1, ',');
4917 if (comma)
4918 *comma = '\0';
4920 first = decode_reg_name (str);
4921 if (first < 0)
4923 warning (0, "unknown register name: %s", str);
4924 return;
4927 last = decode_reg_name (dash + 1);
4928 if (last < 0)
4930 warning (0, "unknown register name: %s", dash + 1);
4931 return;
4934 *dash = '-';
4936 if (first > last)
4938 warning (0, "%s-%s is an empty range", str, dash + 1);
4939 return;
4942 for (i = first; i <= last; ++i)
4943 fixed_regs[i] = call_used_regs[i] = 1;
4945 if (!comma)
4946 break;
4948 *comma = ',';
4949 str = comma + 1;
4953 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4954 can be generated using the fsmbi instruction. */
4956 fsmbi_const_p (rtx x)
4958 if (CONSTANT_P (x))
4960 /* We can always choose TImode for CONST_INT because the high bits
4961 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4962 enum immediate_class c = classify_immediate (x, TImode);
4963 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4965 return 0;
4968 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4969 can be generated using the cbd, chd, cwd or cdd instruction. */
4971 cpat_const_p (rtx x, machine_mode mode)
4973 if (CONSTANT_P (x))
4975 enum immediate_class c = classify_immediate (x, mode);
4976 return c == IC_CPAT;
4978 return 0;
4982 gen_cpat_const (rtx * ops)
4984 unsigned char dst[16];
4985 int i, offset, shift, isize;
4986 if (GET_CODE (ops[3]) != CONST_INT
4987 || GET_CODE (ops[2]) != CONST_INT
4988 || (GET_CODE (ops[1]) != CONST_INT
4989 && GET_CODE (ops[1]) != REG))
4990 return 0;
4991 if (GET_CODE (ops[1]) == REG
4992 && (!REG_POINTER (ops[1])
4993 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4994 return 0;
4996 for (i = 0; i < 16; i++)
4997 dst[i] = i + 16;
4998 isize = INTVAL (ops[3]);
4999 if (isize == 1)
5000 shift = 3;
5001 else if (isize == 2)
5002 shift = 2;
5003 else
5004 shift = 0;
5005 offset = (INTVAL (ops[2]) +
5006 (GET_CODE (ops[1]) ==
5007 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5008 for (i = 0; i < isize; i++)
5009 dst[offset + i] = i + shift;
5010 return array_to_constant (TImode, dst);
5013 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5014 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5015 than 16 bytes, the value is repeated across the rest of the array. */
5016 void
5017 constant_to_array (machine_mode mode, rtx x, unsigned char arr[16])
5019 HOST_WIDE_INT val;
5020 int i, j, first;
5022 memset (arr, 0, 16);
5023 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5024 if (GET_CODE (x) == CONST_INT
5025 || (GET_CODE (x) == CONST_DOUBLE
5026 && (mode == SFmode || mode == DFmode)))
5028 gcc_assert (mode != VOIDmode && mode != BLKmode);
5030 if (GET_CODE (x) == CONST_DOUBLE)
5031 val = const_double_to_hwint (x);
5032 else
5033 val = INTVAL (x);
5034 first = GET_MODE_SIZE (mode) - 1;
5035 for (i = first; i >= 0; i--)
5037 arr[i] = val & 0xff;
5038 val >>= 8;
5040 /* Splat the constant across the whole array. */
5041 for (j = 0, i = first + 1; i < 16; i++)
5043 arr[i] = arr[j];
5044 j = (j == first) ? 0 : j + 1;
5047 else if (GET_CODE (x) == CONST_DOUBLE)
5049 val = CONST_DOUBLE_LOW (x);
5050 for (i = 15; i >= 8; i--)
5052 arr[i] = val & 0xff;
5053 val >>= 8;
5055 val = CONST_DOUBLE_HIGH (x);
5056 for (i = 7; i >= 0; i--)
5058 arr[i] = val & 0xff;
5059 val >>= 8;
5062 else if (GET_CODE (x) == CONST_VECTOR)
5064 int units;
5065 rtx elt;
5066 mode = GET_MODE_INNER (mode);
5067 units = CONST_VECTOR_NUNITS (x);
5068 for (i = 0; i < units; i++)
5070 elt = CONST_VECTOR_ELT (x, i);
5071 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5073 if (GET_CODE (elt) == CONST_DOUBLE)
5074 val = const_double_to_hwint (elt);
5075 else
5076 val = INTVAL (elt);
5077 first = GET_MODE_SIZE (mode) - 1;
5078 if (first + i * GET_MODE_SIZE (mode) > 16)
5079 abort ();
5080 for (j = first; j >= 0; j--)
5082 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5083 val >>= 8;
5088 else
5089 gcc_unreachable();
5092 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5093 smaller than 16 bytes, use the bytes that would represent that value
5094 in a register, e.g., for QImode return the value of arr[3]. */
5096 array_to_constant (machine_mode mode, const unsigned char arr[16])
5098 machine_mode inner_mode;
5099 rtvec v;
5100 int units, size, i, j, k;
5101 HOST_WIDE_INT val;
5103 if (GET_MODE_CLASS (mode) == MODE_INT
5104 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5106 j = GET_MODE_SIZE (mode);
5107 i = j < 4 ? 4 - j : 0;
5108 for (val = 0; i < j; i++)
5109 val = (val << 8) | arr[i];
5110 val = trunc_int_for_mode (val, mode);
5111 return GEN_INT (val);
5114 if (mode == TImode)
5116 HOST_WIDE_INT high;
5117 for (i = high = 0; i < 8; i++)
5118 high = (high << 8) | arr[i];
5119 for (i = 8, val = 0; i < 16; i++)
5120 val = (val << 8) | arr[i];
5121 return immed_double_const (val, high, TImode);
5123 if (mode == SFmode)
5125 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5126 val = trunc_int_for_mode (val, SImode);
5127 return hwint_to_const_double (SFmode, val);
5129 if (mode == DFmode)
5131 for (i = 0, val = 0; i < 8; i++)
5132 val = (val << 8) | arr[i];
5133 return hwint_to_const_double (DFmode, val);
5136 if (!VECTOR_MODE_P (mode))
5137 abort ();
5139 units = GET_MODE_NUNITS (mode);
5140 size = GET_MODE_UNIT_SIZE (mode);
5141 inner_mode = GET_MODE_INNER (mode);
5142 v = rtvec_alloc (units);
5144 for (k = i = 0; i < units; ++i)
5146 val = 0;
5147 for (j = 0; j < size; j++, k++)
5148 val = (val << 8) | arr[k];
5150 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5151 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5152 else
5153 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5155 if (k > 16)
5156 abort ();
5158 return gen_rtx_CONST_VECTOR (mode, v);
5161 static void
5162 reloc_diagnostic (rtx x)
5164 tree decl = 0;
5165 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5166 return;
5168 if (GET_CODE (x) == SYMBOL_REF)
5169 decl = SYMBOL_REF_DECL (x);
5170 else if (GET_CODE (x) == CONST
5171 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5172 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5174 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5175 if (decl && !DECL_P (decl))
5176 decl = 0;
5178 /* The decl could be a string constant. */
5179 if (decl && DECL_P (decl))
5181 location_t loc;
5182 /* We use last_assemble_variable_decl to get line information. It's
5183 not always going to be right and might not even be close, but will
5184 be right for the more common cases. */
5185 if (!last_assemble_variable_decl || in_section == ctors_section)
5186 loc = DECL_SOURCE_LOCATION (decl);
5187 else
5188 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5190 if (TARGET_WARN_RELOC)
5191 warning_at (loc, 0,
5192 "creating run-time relocation for %qD", decl);
5193 else
5194 error_at (loc,
5195 "creating run-time relocation for %qD", decl);
5197 else
5199 if (TARGET_WARN_RELOC)
5200 warning_at (input_location, 0, "creating run-time relocation");
5201 else
5202 error_at (input_location, "creating run-time relocation");
5206 /* Hook into assemble_integer so we can generate an error for run-time
5207 relocations. The SPU ABI disallows them. */
5208 static bool
5209 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5211 /* By default run-time relocations aren't supported, but we allow them
5212 in case users support it in their own run-time loader. And we provide
5213 a warning for those users that don't. */
5214 if ((GET_CODE (x) == SYMBOL_REF)
5215 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5216 reloc_diagnostic (x);
5218 return default_assemble_integer (x, size, aligned_p);
5221 static void
5222 spu_asm_globalize_label (FILE * file, const char *name)
5224 fputs ("\t.global\t", file);
5225 assemble_name (file, name);
5226 fputs ("\n", file);
5229 static bool
5230 spu_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
5231 int opno ATTRIBUTE_UNUSED, int *total,
5232 bool speed ATTRIBUTE_UNUSED)
5234 int code = GET_CODE (x);
5235 int cost = COSTS_N_INSNS (2);
5237 /* Folding to a CONST_VECTOR will use extra space but there might
5238 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5239 only if it allows us to fold away multiple insns. Changing the cost
5240 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5241 because this cost will only be compared against a single insn.
5242 if (code == CONST_VECTOR)
5243 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5246 /* Use defaults for float operations. Not accurate but good enough. */
5247 if (mode == DFmode)
5249 *total = COSTS_N_INSNS (13);
5250 return true;
5252 if (mode == SFmode)
5254 *total = COSTS_N_INSNS (6);
5255 return true;
5257 switch (code)
5259 case CONST_INT:
5260 if (satisfies_constraint_K (x))
5261 *total = 0;
5262 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5263 *total = COSTS_N_INSNS (1);
5264 else
5265 *total = COSTS_N_INSNS (3);
5266 return true;
5268 case CONST:
5269 *total = COSTS_N_INSNS (3);
5270 return true;
5272 case LABEL_REF:
5273 case SYMBOL_REF:
5274 *total = COSTS_N_INSNS (0);
5275 return true;
5277 case CONST_DOUBLE:
5278 *total = COSTS_N_INSNS (5);
5279 return true;
5281 case FLOAT_EXTEND:
5282 case FLOAT_TRUNCATE:
5283 case FLOAT:
5284 case UNSIGNED_FLOAT:
5285 case FIX:
5286 case UNSIGNED_FIX:
5287 *total = COSTS_N_INSNS (7);
5288 return true;
5290 case PLUS:
5291 if (mode == TImode)
5293 *total = COSTS_N_INSNS (9);
5294 return true;
5296 break;
5298 case MULT:
5299 cost =
5300 GET_CODE (XEXP (x, 0)) ==
5301 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5302 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5304 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5306 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5307 cost = COSTS_N_INSNS (14);
5308 if ((val & 0xffff) == 0)
5309 cost = COSTS_N_INSNS (9);
5310 else if (val > 0 && val < 0x10000)
5311 cost = COSTS_N_INSNS (11);
5314 *total = cost;
5315 return true;
5316 case DIV:
5317 case UDIV:
5318 case MOD:
5319 case UMOD:
5320 *total = COSTS_N_INSNS (20);
5321 return true;
5322 case ROTATE:
5323 case ROTATERT:
5324 case ASHIFT:
5325 case ASHIFTRT:
5326 case LSHIFTRT:
5327 *total = COSTS_N_INSNS (4);
5328 return true;
5329 case UNSPEC:
5330 if (XINT (x, 1) == UNSPEC_CONVERT)
5331 *total = COSTS_N_INSNS (0);
5332 else
5333 *total = COSTS_N_INSNS (4);
5334 return true;
5336 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5337 if (GET_MODE_CLASS (mode) == MODE_INT
5338 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5339 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5340 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5341 *total = cost;
5342 return true;
5345 static scalar_int_mode
5346 spu_unwind_word_mode (void)
5348 return SImode;
5351 /* Decide whether we can make a sibling call to a function. DECL is the
5352 declaration of the function being targeted by the call and EXP is the
5353 CALL_EXPR representing the call. */
5354 static bool
5355 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5357 return decl && !TARGET_LARGE_MEM;
5360 /* We need to correctly update the back chain pointer and the Available
5361 Stack Size (which is in the second slot of the sp register.) */
5362 void
5363 spu_allocate_stack (rtx op0, rtx op1)
5365 HOST_WIDE_INT v;
5366 rtx chain = gen_reg_rtx (V4SImode);
5367 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5368 rtx sp = gen_reg_rtx (V4SImode);
5369 rtx splatted = gen_reg_rtx (V4SImode);
5370 rtx pat = gen_reg_rtx (TImode);
5372 /* copy the back chain so we can save it back again. */
5373 emit_move_insn (chain, stack_bot);
5375 op1 = force_reg (SImode, op1);
5377 v = 0x1020300010203ll;
5378 emit_move_insn (pat, immed_double_const (v, v, TImode));
5379 emit_insn (gen_shufb (splatted, op1, op1, pat));
5381 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5382 emit_insn (gen_subv4si3 (sp, sp, splatted));
5384 if (flag_stack_check || flag_stack_clash_protection)
5386 rtx avail = gen_reg_rtx(SImode);
5387 rtx result = gen_reg_rtx(SImode);
5388 emit_insn (gen_vec_extractv4sisi (avail, sp, GEN_INT (1)));
5389 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5390 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5393 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5395 emit_move_insn (stack_bot, chain);
5397 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5400 void
5401 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5403 static unsigned char arr[16] =
5404 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5405 rtx temp = gen_reg_rtx (SImode);
5406 rtx temp2 = gen_reg_rtx (SImode);
5407 rtx temp3 = gen_reg_rtx (V4SImode);
5408 rtx temp4 = gen_reg_rtx (V4SImode);
5409 rtx pat = gen_reg_rtx (TImode);
5410 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5412 /* Restore the backchain from the first word, sp from the second. */
5413 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5414 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5416 emit_move_insn (pat, array_to_constant (TImode, arr));
5418 /* Compute Available Stack Size for sp */
5419 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5420 emit_insn (gen_shufb (temp3, temp, temp, pat));
5422 /* Compute Available Stack Size for back chain */
5423 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5424 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5425 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5427 emit_insn (gen_addv4si3 (sp, sp, temp3));
5428 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5431 static void
5432 spu_init_libfuncs (void)
5434 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5435 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5436 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5437 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5438 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5439 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5440 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5441 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5442 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5443 set_optab_libfunc (clrsb_optab, DImode, "__clrsbdi2");
5444 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5445 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5447 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5448 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5450 set_optab_libfunc (addv_optab, SImode, "__addvsi3");
5451 set_optab_libfunc (subv_optab, SImode, "__subvsi3");
5452 set_optab_libfunc (smulv_optab, SImode, "__mulvsi3");
5453 set_optab_libfunc (sdivv_optab, SImode, "__divvsi3");
5454 set_optab_libfunc (negv_optab, SImode, "__negvsi2");
5455 set_optab_libfunc (absv_optab, SImode, "__absvsi2");
5456 set_optab_libfunc (addv_optab, DImode, "__addvdi3");
5457 set_optab_libfunc (subv_optab, DImode, "__subvdi3");
5458 set_optab_libfunc (smulv_optab, DImode, "__mulvdi3");
5459 set_optab_libfunc (sdivv_optab, DImode, "__divvdi3");
5460 set_optab_libfunc (negv_optab, DImode, "__negvdi2");
5461 set_optab_libfunc (absv_optab, DImode, "__absvdi2");
5463 set_optab_libfunc (smul_optab, TImode, "__multi3");
5464 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5465 set_optab_libfunc (smod_optab, TImode, "__modti3");
5466 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5467 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5468 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5471 /* Make a subreg, stripping any existing subreg. We could possibly just
5472 call simplify_subreg, but in this case we know what we want. */
5474 spu_gen_subreg (machine_mode mode, rtx x)
5476 if (GET_CODE (x) == SUBREG)
5477 x = SUBREG_REG (x);
5478 if (GET_MODE (x) == mode)
5479 return x;
5480 return gen_rtx_SUBREG (mode, x, 0);
5483 static bool
5484 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5486 return (TYPE_MODE (type) == BLKmode
5487 && ((type) == 0
5488 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5489 || int_size_in_bytes (type) >
5490 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5493 /* Create the built-in types and functions */
5495 enum spu_function_code
5497 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5498 #include "spu-builtins.def"
5499 #undef DEF_BUILTIN
5500 NUM_SPU_BUILTINS
5503 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5505 struct spu_builtin_description spu_builtins[] = {
5506 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5507 {fcode, icode, name, type, params},
5508 #include "spu-builtins.def"
5509 #undef DEF_BUILTIN
5512 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5514 /* Returns the spu builtin decl for CODE. */
5516 static tree
5517 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5519 if (code >= NUM_SPU_BUILTINS)
5520 return error_mark_node;
5522 return spu_builtin_decls[code];
5526 static void
5527 spu_init_builtins (void)
5529 struct spu_builtin_description *d;
5530 unsigned int i;
5532 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5533 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5534 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5535 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5536 V4SF_type_node = build_vector_type (float_type_node, 4);
5537 V2DF_type_node = build_vector_type (double_type_node, 2);
5539 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5540 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5541 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5542 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5544 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5546 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5547 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5548 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5549 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5550 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5551 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5552 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5553 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5554 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5555 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5556 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5557 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5559 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5560 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5561 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5562 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5563 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5564 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5565 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5566 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5568 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5569 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5571 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5573 spu_builtin_types[SPU_BTI_PTR] =
5574 build_pointer_type (build_qualified_type
5575 (void_type_node,
5576 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5578 /* For each builtin we build a new prototype. The tree code will make
5579 sure nodes are shared. */
5580 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5582 tree p;
5583 char name[64]; /* build_function will make a copy. */
5584 int parm;
5586 if (d->name == 0)
5587 continue;
5589 /* Find last parm. */
5590 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5593 p = void_list_node;
5594 while (parm > 1)
5595 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5597 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5599 sprintf (name, "__builtin_%s", d->name);
5600 spu_builtin_decls[i] =
5601 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5602 if (d->fcode == SPU_MASK_FOR_LOAD)
5603 TREE_READONLY (spu_builtin_decls[i]) = 1;
5605 /* These builtins don't throw. */
5606 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5610 void
5611 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5613 static unsigned char arr[16] =
5614 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5616 rtx temp = gen_reg_rtx (Pmode);
5617 rtx temp2 = gen_reg_rtx (V4SImode);
5618 rtx temp3 = gen_reg_rtx (V4SImode);
5619 rtx pat = gen_reg_rtx (TImode);
5620 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5622 emit_move_insn (pat, array_to_constant (TImode, arr));
5624 /* Restore the sp. */
5625 emit_move_insn (temp, op1);
5626 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5628 /* Compute available stack size for sp. */
5629 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5630 emit_insn (gen_shufb (temp3, temp, temp, pat));
5632 emit_insn (gen_addv4si3 (sp, sp, temp3));
5633 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5637 spu_safe_dma (HOST_WIDE_INT channel)
5639 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5642 void
5643 spu_builtin_splats (rtx ops[])
5645 machine_mode mode = GET_MODE (ops[0]);
5646 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5648 unsigned char arr[16];
5649 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5650 emit_move_insn (ops[0], array_to_constant (mode, arr));
5652 else
5654 rtx reg = gen_reg_rtx (TImode);
5655 rtx shuf;
5656 if (GET_CODE (ops[1]) != REG
5657 && GET_CODE (ops[1]) != SUBREG)
5658 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5659 switch (mode)
5661 case E_V2DImode:
5662 case E_V2DFmode:
5663 shuf =
5664 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5665 TImode);
5666 break;
5667 case E_V4SImode:
5668 case E_V4SFmode:
5669 shuf =
5670 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5671 TImode);
5672 break;
5673 case E_V8HImode:
5674 shuf =
5675 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5676 TImode);
5677 break;
5678 case E_V16QImode:
5679 shuf =
5680 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5681 TImode);
5682 break;
5683 default:
5684 abort ();
5686 emit_move_insn (reg, shuf);
5687 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5691 void
5692 spu_builtin_extract (rtx ops[])
5694 machine_mode mode;
5695 rtx rot, from, tmp;
5697 mode = GET_MODE (ops[1]);
5699 if (GET_CODE (ops[2]) == CONST_INT)
5701 switch (mode)
5703 case E_V16QImode:
5704 emit_insn (gen_vec_extractv16qiqi (ops[0], ops[1], ops[2]));
5705 break;
5706 case E_V8HImode:
5707 emit_insn (gen_vec_extractv8hihi (ops[0], ops[1], ops[2]));
5708 break;
5709 case E_V4SFmode:
5710 emit_insn (gen_vec_extractv4sfsf (ops[0], ops[1], ops[2]));
5711 break;
5712 case E_V4SImode:
5713 emit_insn (gen_vec_extractv4sisi (ops[0], ops[1], ops[2]));
5714 break;
5715 case E_V2DImode:
5716 emit_insn (gen_vec_extractv2didi (ops[0], ops[1], ops[2]));
5717 break;
5718 case E_V2DFmode:
5719 emit_insn (gen_vec_extractv2dfdf (ops[0], ops[1], ops[2]));
5720 break;
5721 default:
5722 abort ();
5724 return;
5727 from = spu_gen_subreg (TImode, ops[1]);
5728 rot = gen_reg_rtx (TImode);
5729 tmp = gen_reg_rtx (SImode);
5731 switch (mode)
5733 case E_V16QImode:
5734 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5735 break;
5736 case E_V8HImode:
5737 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5738 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5739 break;
5740 case E_V4SFmode:
5741 case E_V4SImode:
5742 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5743 break;
5744 case E_V2DImode:
5745 case E_V2DFmode:
5746 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5747 break;
5748 default:
5749 abort ();
5751 emit_insn (gen_rotqby_ti (rot, from, tmp));
5753 emit_insn (gen_spu_convert (ops[0], rot));
5756 void
5757 spu_builtin_insert (rtx ops[])
5759 machine_mode mode = GET_MODE (ops[0]);
5760 machine_mode imode = GET_MODE_INNER (mode);
5761 rtx mask = gen_reg_rtx (TImode);
5762 rtx offset;
5764 if (GET_CODE (ops[3]) == CONST_INT)
5765 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5766 else
5768 offset = gen_reg_rtx (SImode);
5769 emit_insn (gen_mulsi3
5770 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5772 emit_insn (gen_cpat
5773 (mask, stack_pointer_rtx, offset,
5774 GEN_INT (GET_MODE_SIZE (imode))));
5775 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5778 void
5779 spu_builtin_promote (rtx ops[])
5781 machine_mode mode, imode;
5782 rtx rot, from, offset;
5783 HOST_WIDE_INT pos;
5785 mode = GET_MODE (ops[0]);
5786 imode = GET_MODE_INNER (mode);
5788 from = gen_reg_rtx (TImode);
5789 rot = spu_gen_subreg (TImode, ops[0]);
5791 emit_insn (gen_spu_convert (from, ops[1]));
5793 if (GET_CODE (ops[2]) == CONST_INT)
5795 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5796 if (GET_MODE_SIZE (imode) < 4)
5797 pos += 4 - GET_MODE_SIZE (imode);
5798 offset = GEN_INT (pos & 15);
5800 else
5802 offset = gen_reg_rtx (SImode);
5803 switch (mode)
5805 case E_V16QImode:
5806 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5807 break;
5808 case E_V8HImode:
5809 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5810 emit_insn (gen_addsi3 (offset, offset, offset));
5811 break;
5812 case E_V4SFmode:
5813 case E_V4SImode:
5814 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5815 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5816 break;
5817 case E_V2DImode:
5818 case E_V2DFmode:
5819 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5820 break;
5821 default:
5822 abort ();
5825 emit_insn (gen_rotqby_ti (rot, from, offset));
5828 static void
5829 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5831 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5832 rtx shuf = gen_reg_rtx (V4SImode);
5833 rtx insn = gen_reg_rtx (V4SImode);
5834 rtx shufc;
5835 rtx insnc;
5836 rtx mem;
5838 fnaddr = force_reg (SImode, fnaddr);
5839 cxt = force_reg (SImode, cxt);
5841 if (TARGET_LARGE_MEM)
5843 rtx rotl = gen_reg_rtx (V4SImode);
5844 rtx mask = gen_reg_rtx (V4SImode);
5845 rtx bi = gen_reg_rtx (SImode);
5846 static unsigned char const shufa[16] = {
5847 2, 3, 0, 1, 18, 19, 16, 17,
5848 0, 1, 2, 3, 16, 17, 18, 19
5850 static unsigned char const insna[16] = {
5851 0x41, 0, 0, 79,
5852 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5853 0x60, 0x80, 0, 79,
5854 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5857 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5858 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5860 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5861 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5862 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5863 emit_insn (gen_selb (insn, insnc, rotl, mask));
5865 mem = adjust_address (m_tramp, V4SImode, 0);
5866 emit_move_insn (mem, insn);
5868 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5869 mem = adjust_address (m_tramp, Pmode, 16);
5870 emit_move_insn (mem, bi);
5872 else
5874 rtx scxt = gen_reg_rtx (SImode);
5875 rtx sfnaddr = gen_reg_rtx (SImode);
5876 static unsigned char const insna[16] = {
5877 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5878 0x30, 0, 0, 0,
5879 0, 0, 0, 0,
5880 0, 0, 0, 0
5883 shufc = gen_reg_rtx (TImode);
5884 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5886 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5887 fits 18 bits and the last 4 are zeros. This will be true if
5888 the stack pointer is initialized to 0x3fff0 at program start,
5889 otherwise the ila instruction will be garbage. */
5891 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5892 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5893 emit_insn (gen_cpat
5894 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5895 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5896 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5898 mem = adjust_address (m_tramp, V4SImode, 0);
5899 emit_move_insn (mem, insn);
5901 emit_insn (gen_sync ());
5904 static bool
5905 spu_warn_func_return (tree decl)
5907 /* Naked functions are implemented entirely in assembly, including the
5908 return sequence, so suppress warnings about this. */
5909 return !spu_naked_function_p (decl);
5912 void
5913 spu_expand_sign_extend (rtx ops[])
5915 unsigned char arr[16];
5916 rtx pat = gen_reg_rtx (TImode);
5917 rtx sign, c;
5918 int i, last;
5919 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5920 if (GET_MODE (ops[1]) == QImode)
5922 sign = gen_reg_rtx (HImode);
5923 emit_insn (gen_extendqihi2 (sign, ops[1]));
5924 for (i = 0; i < 16; i++)
5925 arr[i] = 0x12;
5926 arr[last] = 0x13;
5928 else
5930 for (i = 0; i < 16; i++)
5931 arr[i] = 0x10;
5932 switch (GET_MODE (ops[1]))
5934 case E_HImode:
5935 sign = gen_reg_rtx (SImode);
5936 emit_insn (gen_extendhisi2 (sign, ops[1]));
5937 arr[last] = 0x03;
5938 arr[last - 1] = 0x02;
5939 break;
5940 case E_SImode:
5941 sign = gen_reg_rtx (SImode);
5942 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5943 for (i = 0; i < 4; i++)
5944 arr[last - i] = 3 - i;
5945 break;
5946 case E_DImode:
5947 sign = gen_reg_rtx (SImode);
5948 c = gen_reg_rtx (SImode);
5949 emit_insn (gen_spu_convert (c, ops[1]));
5950 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5951 for (i = 0; i < 8; i++)
5952 arr[last - i] = 7 - i;
5953 break;
5954 default:
5955 abort ();
5958 emit_move_insn (pat, array_to_constant (TImode, arr));
5959 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5962 /* expand vector initialization. If there are any constant parts,
5963 load constant parts first. Then load any non-constant parts. */
5964 void
5965 spu_expand_vector_init (rtx target, rtx vals)
5967 machine_mode mode = GET_MODE (target);
5968 int n_elts = GET_MODE_NUNITS (mode);
5969 int n_var = 0;
5970 bool all_same = true;
5971 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5972 int i;
5974 first = XVECEXP (vals, 0, 0);
5975 for (i = 0; i < n_elts; ++i)
5977 x = XVECEXP (vals, 0, i);
5978 if (!(CONST_INT_P (x)
5979 || GET_CODE (x) == CONST_DOUBLE
5980 || GET_CODE (x) == CONST_FIXED))
5981 ++n_var;
5982 else
5984 if (first_constant == NULL_RTX)
5985 first_constant = x;
5987 if (i > 0 && !rtx_equal_p (x, first))
5988 all_same = false;
5991 /* if all elements are the same, use splats to repeat elements */
5992 if (all_same)
5994 if (!CONSTANT_P (first)
5995 && !register_operand (first, GET_MODE (x)))
5996 first = force_reg (GET_MODE (first), first);
5997 emit_insn (gen_spu_splats (target, first));
5998 return;
6001 /* load constant parts */
6002 if (n_var != n_elts)
6004 if (n_var == 0)
6006 emit_move_insn (target,
6007 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6009 else
6011 rtx constant_parts_rtx = copy_rtx (vals);
6013 gcc_assert (first_constant != NULL_RTX);
6014 /* fill empty slots with the first constant, this increases
6015 our chance of using splats in the recursive call below. */
6016 for (i = 0; i < n_elts; ++i)
6018 x = XVECEXP (constant_parts_rtx, 0, i);
6019 if (!(CONST_INT_P (x)
6020 || GET_CODE (x) == CONST_DOUBLE
6021 || GET_CODE (x) == CONST_FIXED))
6022 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6025 spu_expand_vector_init (target, constant_parts_rtx);
6029 /* load variable parts */
6030 if (n_var != 0)
6032 rtx insert_operands[4];
6034 insert_operands[0] = target;
6035 insert_operands[2] = target;
6036 for (i = 0; i < n_elts; ++i)
6038 x = XVECEXP (vals, 0, i);
6039 if (!(CONST_INT_P (x)
6040 || GET_CODE (x) == CONST_DOUBLE
6041 || GET_CODE (x) == CONST_FIXED))
6043 if (!register_operand (x, GET_MODE (x)))
6044 x = force_reg (GET_MODE (x), x);
6045 insert_operands[1] = x;
6046 insert_operands[3] = GEN_INT (i);
6047 spu_builtin_insert (insert_operands);
6053 /* Return insn index for the vector compare instruction for given CODE,
6054 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6056 static int
6057 get_vec_cmp_insn (enum rtx_code code,
6058 machine_mode dest_mode,
6059 machine_mode op_mode)
6062 switch (code)
6064 case EQ:
6065 if (dest_mode == V16QImode && op_mode == V16QImode)
6066 return CODE_FOR_ceq_v16qi;
6067 if (dest_mode == V8HImode && op_mode == V8HImode)
6068 return CODE_FOR_ceq_v8hi;
6069 if (dest_mode == V4SImode && op_mode == V4SImode)
6070 return CODE_FOR_ceq_v4si;
6071 if (dest_mode == V4SImode && op_mode == V4SFmode)
6072 return CODE_FOR_ceq_v4sf;
6073 if (dest_mode == V2DImode && op_mode == V2DFmode)
6074 return CODE_FOR_ceq_v2df;
6075 break;
6076 case GT:
6077 if (dest_mode == V16QImode && op_mode == V16QImode)
6078 return CODE_FOR_cgt_v16qi;
6079 if (dest_mode == V8HImode && op_mode == V8HImode)
6080 return CODE_FOR_cgt_v8hi;
6081 if (dest_mode == V4SImode && op_mode == V4SImode)
6082 return CODE_FOR_cgt_v4si;
6083 if (dest_mode == V4SImode && op_mode == V4SFmode)
6084 return CODE_FOR_cgt_v4sf;
6085 if (dest_mode == V2DImode && op_mode == V2DFmode)
6086 return CODE_FOR_cgt_v2df;
6087 break;
6088 case GTU:
6089 if (dest_mode == V16QImode && op_mode == V16QImode)
6090 return CODE_FOR_clgt_v16qi;
6091 if (dest_mode == V8HImode && op_mode == V8HImode)
6092 return CODE_FOR_clgt_v8hi;
6093 if (dest_mode == V4SImode && op_mode == V4SImode)
6094 return CODE_FOR_clgt_v4si;
6095 break;
6096 default:
6097 break;
6099 return -1;
6102 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6103 DMODE is expected destination mode. This is a recursive function. */
6105 static rtx
6106 spu_emit_vector_compare (enum rtx_code rcode,
6107 rtx op0, rtx op1,
6108 machine_mode dmode)
6110 int vec_cmp_insn;
6111 rtx mask;
6112 machine_mode dest_mode;
6113 machine_mode op_mode = GET_MODE (op1);
6115 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6117 /* Floating point vector compare instructions uses destination V4SImode.
6118 Double floating point vector compare instructions uses destination V2DImode.
6119 Move destination to appropriate mode later. */
6120 if (dmode == V4SFmode)
6121 dest_mode = V4SImode;
6122 else if (dmode == V2DFmode)
6123 dest_mode = V2DImode;
6124 else
6125 dest_mode = dmode;
6127 mask = gen_reg_rtx (dest_mode);
6128 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6130 if (vec_cmp_insn == -1)
6132 bool swap_operands = false;
6133 bool try_again = false;
6134 switch (rcode)
6136 case LT:
6137 rcode = GT;
6138 swap_operands = true;
6139 try_again = true;
6140 break;
6141 case LTU:
6142 rcode = GTU;
6143 swap_operands = true;
6144 try_again = true;
6145 break;
6146 case NE:
6147 case UNEQ:
6148 case UNLE:
6149 case UNLT:
6150 case UNGE:
6151 case UNGT:
6152 case UNORDERED:
6153 /* Treat A != B as ~(A==B). */
6155 enum rtx_code rev_code;
6156 enum insn_code nor_code;
6157 rtx rev_mask;
6159 rev_code = reverse_condition_maybe_unordered (rcode);
6160 rev_mask = spu_emit_vector_compare (rev_code, op0, op1, dest_mode);
6162 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6163 gcc_assert (nor_code != CODE_FOR_nothing);
6164 emit_insn (GEN_FCN (nor_code) (mask, rev_mask));
6165 if (dmode != dest_mode)
6167 rtx temp = gen_reg_rtx (dest_mode);
6168 convert_move (temp, mask, 0);
6169 return temp;
6171 return mask;
6173 break;
6174 case GE:
6175 case GEU:
6176 case LE:
6177 case LEU:
6178 /* Try GT/GTU/LT/LTU OR EQ */
6180 rtx c_rtx, eq_rtx;
6181 enum insn_code ior_code;
6182 enum rtx_code new_code;
6184 switch (rcode)
6186 case GE: new_code = GT; break;
6187 case GEU: new_code = GTU; break;
6188 case LE: new_code = LT; break;
6189 case LEU: new_code = LTU; break;
6190 default:
6191 gcc_unreachable ();
6194 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6195 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6197 ior_code = optab_handler (ior_optab, dest_mode);
6198 gcc_assert (ior_code != CODE_FOR_nothing);
6199 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6200 if (dmode != dest_mode)
6202 rtx temp = gen_reg_rtx (dest_mode);
6203 convert_move (temp, mask, 0);
6204 return temp;
6206 return mask;
6208 break;
6209 case LTGT:
6210 /* Try LT OR GT */
6212 rtx lt_rtx, gt_rtx;
6213 enum insn_code ior_code;
6215 lt_rtx = spu_emit_vector_compare (LT, op0, op1, dest_mode);
6216 gt_rtx = spu_emit_vector_compare (GT, op0, op1, dest_mode);
6218 ior_code = optab_handler (ior_optab, dest_mode);
6219 gcc_assert (ior_code != CODE_FOR_nothing);
6220 emit_insn (GEN_FCN (ior_code) (mask, lt_rtx, gt_rtx));
6221 if (dmode != dest_mode)
6223 rtx temp = gen_reg_rtx (dest_mode);
6224 convert_move (temp, mask, 0);
6225 return temp;
6227 return mask;
6229 break;
6230 case ORDERED:
6231 /* Implement as (A==A) & (B==B) */
6233 rtx a_rtx, b_rtx;
6234 enum insn_code and_code;
6236 a_rtx = spu_emit_vector_compare (EQ, op0, op0, dest_mode);
6237 b_rtx = spu_emit_vector_compare (EQ, op1, op1, dest_mode);
6239 and_code = optab_handler (and_optab, dest_mode);
6240 gcc_assert (and_code != CODE_FOR_nothing);
6241 emit_insn (GEN_FCN (and_code) (mask, a_rtx, b_rtx));
6242 if (dmode != dest_mode)
6244 rtx temp = gen_reg_rtx (dest_mode);
6245 convert_move (temp, mask, 0);
6246 return temp;
6248 return mask;
6250 break;
6251 default:
6252 gcc_unreachable ();
6255 /* You only get two chances. */
6256 if (try_again)
6257 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6259 gcc_assert (vec_cmp_insn != -1);
6261 if (swap_operands)
6263 rtx tmp;
6264 tmp = op0;
6265 op0 = op1;
6266 op1 = tmp;
6270 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6271 if (dmode != dest_mode)
6273 rtx temp = gen_reg_rtx (dest_mode);
6274 convert_move (temp, mask, 0);
6275 return temp;
6277 return mask;
6281 /* Emit vector conditional expression.
6282 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6283 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6286 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6287 rtx cond, rtx cc_op0, rtx cc_op1)
6289 machine_mode dest_mode = GET_MODE (dest);
6290 enum rtx_code rcode = GET_CODE (cond);
6291 rtx mask;
6293 /* Get the vector mask for the given relational operations. */
6294 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6296 emit_insn(gen_selb (dest, op2, op1, mask));
6298 return 1;
6301 static rtx
6302 spu_force_reg (machine_mode mode, rtx op)
6304 rtx x, r;
6305 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6307 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6308 || GET_MODE (op) == BLKmode)
6309 return force_reg (mode, convert_to_mode (mode, op, 0));
6310 abort ();
6313 r = force_reg (GET_MODE (op), op);
6314 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6316 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6317 if (x)
6318 return x;
6321 x = gen_reg_rtx (mode);
6322 emit_insn (gen_spu_convert (x, r));
6323 return x;
6326 static void
6327 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6329 HOST_WIDE_INT v = 0;
6330 int lsbits;
6331 /* Check the range of immediate operands. */
6332 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6334 int range = p - SPU_BTI_7;
6336 if (!CONSTANT_P (op))
6337 error ("%s expects an integer literal in the range [%d, %d]",
6338 d->name,
6339 spu_builtin_range[range].low, spu_builtin_range[range].high);
6341 if (GET_CODE (op) == CONST
6342 && (GET_CODE (XEXP (op, 0)) == PLUS
6343 || GET_CODE (XEXP (op, 0)) == MINUS))
6345 v = INTVAL (XEXP (XEXP (op, 0), 1));
6346 op = XEXP (XEXP (op, 0), 0);
6348 else if (GET_CODE (op) == CONST_INT)
6349 v = INTVAL (op);
6350 else if (GET_CODE (op) == CONST_VECTOR
6351 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6352 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6354 /* The default for v is 0 which is valid in every range. */
6355 if (v < spu_builtin_range[range].low
6356 || v > spu_builtin_range[range].high)
6357 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6358 d->name,
6359 spu_builtin_range[range].low, spu_builtin_range[range].high,
6362 switch (p)
6364 case SPU_BTI_S10_4:
6365 lsbits = 4;
6366 break;
6367 case SPU_BTI_U16_2:
6368 /* This is only used in lqa, and stqa. Even though the insns
6369 encode 16 bits of the address (all but the 2 least
6370 significant), only 14 bits are used because it is masked to
6371 be 16 byte aligned. */
6372 lsbits = 4;
6373 break;
6374 case SPU_BTI_S16_2:
6375 /* This is used for lqr and stqr. */
6376 lsbits = 2;
6377 break;
6378 default:
6379 lsbits = 0;
6382 if (GET_CODE (op) == LABEL_REF
6383 || (GET_CODE (op) == SYMBOL_REF
6384 && SYMBOL_REF_FUNCTION_P (op))
6385 || (v & ((1 << lsbits) - 1)) != 0)
6386 warning (0, "%d least significant bits of %s are ignored", lsbits,
6387 d->name);
6392 static int
6393 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6394 rtx target, rtx ops[])
6396 enum insn_code icode = (enum insn_code) d->icode;
6397 int i = 0, a;
6399 /* Expand the arguments into rtl. */
6401 if (d->parm[0] != SPU_BTI_VOID)
6402 ops[i++] = target;
6404 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6406 tree arg = CALL_EXPR_ARG (exp, a);
6407 if (arg == 0)
6408 abort ();
6409 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6412 gcc_assert (i == insn_data[icode].n_generator_args);
6413 return i;
6416 static rtx
6417 spu_expand_builtin_1 (struct spu_builtin_description *d,
6418 tree exp, rtx target)
6420 rtx pat;
6421 rtx ops[8];
6422 enum insn_code icode = (enum insn_code) d->icode;
6423 machine_mode mode, tmode;
6424 int i, p;
6425 int n_operands;
6426 tree return_type;
6428 /* Set up ops[] with values from arglist. */
6429 n_operands = expand_builtin_args (d, exp, target, ops);
6431 /* Handle the target operand which must be operand 0. */
6432 i = 0;
6433 if (d->parm[0] != SPU_BTI_VOID)
6436 /* We prefer the mode specified for the match_operand otherwise
6437 use the mode from the builtin function prototype. */
6438 tmode = insn_data[d->icode].operand[0].mode;
6439 if (tmode == VOIDmode)
6440 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6442 /* Try to use target because not using it can lead to extra copies
6443 and when we are using all of the registers extra copies leads
6444 to extra spills. */
6445 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6446 ops[0] = target;
6447 else
6448 target = ops[0] = gen_reg_rtx (tmode);
6450 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6451 abort ();
6453 i++;
6456 if (d->fcode == SPU_MASK_FOR_LOAD)
6458 machine_mode mode = insn_data[icode].operand[1].mode;
6459 tree arg;
6460 rtx addr, op, pat;
6462 /* get addr */
6463 arg = CALL_EXPR_ARG (exp, 0);
6464 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6465 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6466 addr = memory_address (mode, op);
6468 /* negate addr */
6469 op = gen_reg_rtx (GET_MODE (addr));
6470 emit_insn (gen_rtx_SET (op, gen_rtx_NEG (GET_MODE (addr), addr)));
6471 op = gen_rtx_MEM (mode, op);
6473 pat = GEN_FCN (icode) (target, op);
6474 if (!pat)
6475 return 0;
6476 emit_insn (pat);
6477 return target;
6480 /* Ignore align_hint, but still expand it's args in case they have
6481 side effects. */
6482 if (icode == CODE_FOR_spu_align_hint)
6483 return 0;
6485 /* Handle the rest of the operands. */
6486 for (p = 1; i < n_operands; i++, p++)
6488 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6489 mode = insn_data[d->icode].operand[i].mode;
6490 else
6491 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6493 /* mode can be VOIDmode here for labels */
6495 /* For specific intrinsics with an immediate operand, e.g.,
6496 si_ai(), we sometimes need to convert the scalar argument to a
6497 vector argument by splatting the scalar. */
6498 if (VECTOR_MODE_P (mode)
6499 && (GET_CODE (ops[i]) == CONST_INT
6500 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6501 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6503 if (GET_CODE (ops[i]) == CONST_INT)
6504 ops[i] = spu_const (mode, INTVAL (ops[i]));
6505 else
6507 rtx reg = gen_reg_rtx (mode);
6508 machine_mode imode = GET_MODE_INNER (mode);
6509 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6510 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6511 if (imode != GET_MODE (ops[i]))
6512 ops[i] = convert_to_mode (imode, ops[i],
6513 TYPE_UNSIGNED (spu_builtin_types
6514 [d->parm[i]]));
6515 emit_insn (gen_spu_splats (reg, ops[i]));
6516 ops[i] = reg;
6520 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6522 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6523 ops[i] = spu_force_reg (mode, ops[i]);
6526 switch (n_operands)
6528 case 0:
6529 pat = GEN_FCN (icode) (0);
6530 break;
6531 case 1:
6532 pat = GEN_FCN (icode) (ops[0]);
6533 break;
6534 case 2:
6535 pat = GEN_FCN (icode) (ops[0], ops[1]);
6536 break;
6537 case 3:
6538 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6539 break;
6540 case 4:
6541 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6542 break;
6543 case 5:
6544 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6545 break;
6546 case 6:
6547 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6548 break;
6549 default:
6550 abort ();
6553 if (!pat)
6554 abort ();
6556 if (d->type == B_CALL || d->type == B_BISLED)
6557 emit_call_insn (pat);
6558 else if (d->type == B_JUMP)
6560 emit_jump_insn (pat);
6561 emit_barrier ();
6563 else
6564 emit_insn (pat);
6566 return_type = spu_builtin_types[d->parm[0]];
6567 if (d->parm[0] != SPU_BTI_VOID
6568 && GET_MODE (target) != TYPE_MODE (return_type))
6570 /* target is the return value. It should always be the mode of
6571 the builtin function prototype. */
6572 target = spu_force_reg (TYPE_MODE (return_type), target);
6575 return target;
6579 spu_expand_builtin (tree exp,
6580 rtx target,
6581 rtx subtarget ATTRIBUTE_UNUSED,
6582 machine_mode mode ATTRIBUTE_UNUSED,
6583 int ignore ATTRIBUTE_UNUSED)
6585 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6586 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6587 struct spu_builtin_description *d;
6589 if (fcode < NUM_SPU_BUILTINS)
6591 d = &spu_builtins[fcode];
6593 return spu_expand_builtin_1 (d, exp, target);
6595 abort ();
6598 /* Implement targetm.vectorize.builtin_mask_for_load. */
6599 static tree
6600 spu_builtin_mask_for_load (void)
6602 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6605 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6606 static int
6607 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6608 tree vectype,
6609 int misalign ATTRIBUTE_UNUSED)
6611 unsigned elements;
6613 switch (type_of_cost)
6615 case scalar_stmt:
6616 case vector_stmt:
6617 case vector_load:
6618 case vector_store:
6619 case vec_to_scalar:
6620 case scalar_to_vec:
6621 case cond_branch_not_taken:
6622 case vec_perm:
6623 case vec_promote_demote:
6624 return 1;
6626 case scalar_store:
6627 return 10;
6629 case scalar_load:
6630 /* Load + rotate. */
6631 return 2;
6633 case unaligned_load:
6634 case vector_gather_load:
6635 case vector_scatter_store:
6636 return 2;
6638 case cond_branch_taken:
6639 return 6;
6641 case vec_construct:
6642 elements = TYPE_VECTOR_SUBPARTS (vectype);
6643 return elements / 2 + 1;
6645 default:
6646 gcc_unreachable ();
6650 /* Implement targetm.vectorize.init_cost. */
6652 static void *
6653 spu_init_cost (struct loop *loop_info ATTRIBUTE_UNUSED)
6655 unsigned *cost = XNEWVEC (unsigned, 3);
6656 cost[vect_prologue] = cost[vect_body] = cost[vect_epilogue] = 0;
6657 return cost;
6660 /* Implement targetm.vectorize.add_stmt_cost. */
6662 static unsigned
6663 spu_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
6664 struct _stmt_vec_info *stmt_info, int misalign,
6665 enum vect_cost_model_location where)
6667 unsigned *cost = (unsigned *) data;
6668 unsigned retval = 0;
6670 if (flag_vect_cost_model)
6672 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
6673 int stmt_cost = spu_builtin_vectorization_cost (kind, vectype, misalign);
6675 /* Statements in an inner loop relative to the loop being
6676 vectorized are weighted more heavily. The value here is
6677 arbitrary and could potentially be improved with analysis. */
6678 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
6679 count *= 50; /* FIXME. */
6681 retval = (unsigned) (count * stmt_cost);
6682 cost[where] += retval;
6685 return retval;
6688 /* Implement targetm.vectorize.finish_cost. */
6690 static void
6691 spu_finish_cost (void *data, unsigned *prologue_cost,
6692 unsigned *body_cost, unsigned *epilogue_cost)
6694 unsigned *cost = (unsigned *) data;
6695 *prologue_cost = cost[vect_prologue];
6696 *body_cost = cost[vect_body];
6697 *epilogue_cost = cost[vect_epilogue];
6700 /* Implement targetm.vectorize.destroy_cost_data. */
6702 static void
6703 spu_destroy_cost_data (void *data)
6705 free (data);
6708 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6709 after applying N number of iterations. This routine does not determine
6710 how may iterations are required to reach desired alignment. */
6712 static bool
6713 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6715 if (is_packed)
6716 return false;
6718 /* All other types are naturally aligned. */
6719 return true;
6722 /* Return the appropriate mode for a named address pointer. */
6723 static scalar_int_mode
6724 spu_addr_space_pointer_mode (addr_space_t addrspace)
6726 switch (addrspace)
6728 case ADDR_SPACE_GENERIC:
6729 return ptr_mode;
6730 case ADDR_SPACE_EA:
6731 return EAmode;
6732 default:
6733 gcc_unreachable ();
6737 /* Return the appropriate mode for a named address address. */
6738 static scalar_int_mode
6739 spu_addr_space_address_mode (addr_space_t addrspace)
6741 switch (addrspace)
6743 case ADDR_SPACE_GENERIC:
6744 return Pmode;
6745 case ADDR_SPACE_EA:
6746 return EAmode;
6747 default:
6748 gcc_unreachable ();
6752 /* Determine if one named address space is a subset of another. */
6754 static bool
6755 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6757 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6758 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6760 if (subset == superset)
6761 return true;
6763 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6764 being subsets but instead as disjoint address spaces. */
6765 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6766 return false;
6768 else
6769 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6772 /* Convert from one address space to another. */
6773 static rtx
6774 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6776 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6777 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6779 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6780 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6782 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6784 rtx result, ls;
6786 ls = gen_const_mem (DImode,
6787 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6788 set_mem_align (ls, 128);
6790 result = gen_reg_rtx (Pmode);
6791 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6792 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6793 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6794 ls, const0_rtx, Pmode, 1);
6796 emit_insn (gen_subsi3 (result, op, ls));
6798 return result;
6801 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6803 rtx result, ls;
6805 ls = gen_const_mem (DImode,
6806 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6807 set_mem_align (ls, 128);
6809 result = gen_reg_rtx (EAmode);
6810 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6811 op = force_reg (Pmode, op);
6812 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6813 ls, const0_rtx, EAmode, 1);
6814 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6816 if (EAmode == SImode)
6817 emit_insn (gen_addsi3 (result, op, ls));
6818 else
6819 emit_insn (gen_adddi3 (result, op, ls));
6821 return result;
6824 else
6825 gcc_unreachable ();
6829 /* Count the total number of instructions in each pipe and return the
6830 maximum, which is used as the Minimum Iteration Interval (MII)
6831 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6832 -2 are instructions that can go in pipe0 or pipe1. */
6833 static int
6834 spu_sms_res_mii (struct ddg *g)
6836 int i;
6837 unsigned t[4] = {0, 0, 0, 0};
6839 for (i = 0; i < g->num_nodes; i++)
6841 rtx_insn *insn = g->nodes[i].insn;
6842 int p = get_pipe (insn) + 2;
6844 gcc_assert (p >= 0);
6845 gcc_assert (p < 4);
6847 t[p]++;
6848 if (dump_file && INSN_P (insn))
6849 fprintf (dump_file, "i%d %s %d %d\n",
6850 INSN_UID (insn),
6851 insn_data[INSN_CODE(insn)].name,
6852 p, t[p]);
6854 if (dump_file)
6855 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6857 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6861 void
6862 spu_init_expanders (void)
6864 if (cfun)
6866 rtx r0, r1;
6867 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6868 frame_pointer_needed is true. We don't know that until we're
6869 expanding the prologue. */
6870 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6872 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6873 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6874 to be treated as aligned, so generate them here. */
6875 r0 = gen_reg_rtx (SImode);
6876 r1 = gen_reg_rtx (SImode);
6877 mark_reg_pointer (r0, 128);
6878 mark_reg_pointer (r1, 128);
6879 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6880 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6884 static scalar_int_mode
6885 spu_libgcc_cmp_return_mode (void)
6888 /* For SPU word mode is TI mode so it is better to use SImode
6889 for compare returns. */
6890 return SImode;
6893 static scalar_int_mode
6894 spu_libgcc_shift_count_mode (void)
6896 /* For SPU word mode is TI mode so it is better to use SImode
6897 for shift counts. */
6898 return SImode;
6901 /* Implement targetm.section_type_flags. */
6902 static unsigned int
6903 spu_section_type_flags (tree decl, const char *name, int reloc)
6905 /* .toe needs to have type @nobits. */
6906 if (strcmp (name, ".toe") == 0)
6907 return SECTION_BSS;
6908 /* Don't load _ea into the current address space. */
6909 if (strcmp (name, "._ea") == 0)
6910 return SECTION_WRITE | SECTION_DEBUG;
6911 return default_section_type_flags (decl, name, reloc);
6914 /* Implement targetm.select_section. */
6915 static section *
6916 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
6918 /* Variables and constants defined in the __ea address space
6919 go into a special section named "._ea". */
6920 if (TREE_TYPE (decl) != error_mark_node
6921 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
6923 /* We might get called with string constants, but get_named_section
6924 doesn't like them as they are not DECLs. Also, we need to set
6925 flags in that case. */
6926 if (!DECL_P (decl))
6927 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
6929 return get_named_section (decl, "._ea", reloc);
6932 return default_elf_select_section (decl, reloc, align);
6935 /* Implement targetm.unique_section. */
6936 static void
6937 spu_unique_section (tree decl, int reloc)
6939 /* We don't support unique section names in the __ea address
6940 space for now. */
6941 if (TREE_TYPE (decl) != error_mark_node
6942 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
6943 return;
6945 default_unique_section (decl, reloc);
6948 /* Generate a constant or register which contains 2^SCALE. We assume
6949 the result is valid for MODE. Currently, MODE must be V4SFmode and
6950 SCALE must be SImode. */
6952 spu_gen_exp2 (machine_mode mode, rtx scale)
6954 gcc_assert (mode == V4SFmode);
6955 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6956 if (GET_CODE (scale) != CONST_INT)
6958 /* unsigned int exp = (127 + scale) << 23;
6959 __vector float m = (__vector float) spu_splats (exp); */
6960 rtx reg = force_reg (SImode, scale);
6961 rtx exp = gen_reg_rtx (SImode);
6962 rtx mul = gen_reg_rtx (mode);
6963 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6964 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6965 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6966 return mul;
6968 else
6970 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6971 unsigned char arr[16];
6972 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6973 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6974 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6975 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6976 return array_to_constant (mode, arr);
6980 /* After reload, just change the convert into a move instruction
6981 or a dead instruction. */
6982 void
6983 spu_split_convert (rtx ops[])
6985 if (REGNO (ops[0]) == REGNO (ops[1]))
6986 emit_note (NOTE_INSN_DELETED);
6987 else
6989 /* Use TImode always as this might help hard reg copyprop. */
6990 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6991 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6992 emit_insn (gen_move_insn (op0, op1));
6996 void
6997 spu_function_profiler (FILE * file, int labelno ATTRIBUTE_UNUSED)
6999 fprintf (file, "# profile\n");
7000 fprintf (file, "brsl $75, _mcount\n");
7003 /* Implement targetm.ref_may_alias_errno. */
7004 static bool
7005 spu_ref_may_alias_errno (ao_ref *ref)
7007 tree base = ao_ref_base (ref);
7009 /* With SPU newlib, errno is defined as something like
7010 _impure_data._errno
7011 The default implementation of this target macro does not
7012 recognize such expressions, so special-code for it here. */
7014 if (TREE_CODE (base) == VAR_DECL
7015 && !TREE_STATIC (base)
7016 && DECL_EXTERNAL (base)
7017 && TREE_CODE (TREE_TYPE (base)) == RECORD_TYPE
7018 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base)),
7019 "_impure_data") == 0
7020 /* _errno is the first member of _impure_data. */
7021 && ref->offset == 0)
7022 return true;
7024 return default_ref_may_alias_errno (ref);
7027 /* Output thunk to FILE that implements a C++ virtual function call (with
7028 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7029 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7030 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7031 relative to the resulting this pointer. */
7033 static void
7034 spu_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
7035 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
7036 tree function)
7038 rtx op[8];
7040 /* Make sure unwind info is emitted for the thunk if needed. */
7041 final_start_function (emit_barrier (), file, 1);
7043 /* Operand 0 is the target function. */
7044 op[0] = XEXP (DECL_RTL (function), 0);
7046 /* Operand 1 is the 'this' pointer. */
7047 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
7048 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM + 1);
7049 else
7050 op[1] = gen_rtx_REG (Pmode, FIRST_ARG_REGNUM);
7052 /* Operands 2/3 are the low/high halfwords of delta. */
7053 op[2] = GEN_INT (trunc_int_for_mode (delta, HImode));
7054 op[3] = GEN_INT (trunc_int_for_mode (delta >> 16, HImode));
7056 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7057 op[4] = GEN_INT (trunc_int_for_mode (vcall_offset, HImode));
7058 op[5] = GEN_INT (trunc_int_for_mode (vcall_offset >> 16, HImode));
7060 /* Operands 6/7 are temporary registers. */
7061 op[6] = gen_rtx_REG (Pmode, 79);
7062 op[7] = gen_rtx_REG (Pmode, 78);
7064 /* Add DELTA to this pointer. */
7065 if (delta)
7067 if (delta >= -0x200 && delta < 0x200)
7068 output_asm_insn ("ai\t%1,%1,%2", op);
7069 else if (delta >= -0x8000 && delta < 0x8000)
7071 output_asm_insn ("il\t%6,%2", op);
7072 output_asm_insn ("a\t%1,%1,%6", op);
7074 else
7076 output_asm_insn ("ilhu\t%6,%3", op);
7077 output_asm_insn ("iohl\t%6,%2", op);
7078 output_asm_insn ("a\t%1,%1,%6", op);
7082 /* Perform vcall adjustment. */
7083 if (vcall_offset)
7085 output_asm_insn ("lqd\t%7,0(%1)", op);
7086 output_asm_insn ("rotqby\t%7,%7,%1", op);
7088 if (vcall_offset >= -0x200 && vcall_offset < 0x200)
7089 output_asm_insn ("ai\t%7,%7,%4", op);
7090 else if (vcall_offset >= -0x8000 && vcall_offset < 0x8000)
7092 output_asm_insn ("il\t%6,%4", op);
7093 output_asm_insn ("a\t%7,%7,%6", op);
7095 else
7097 output_asm_insn ("ilhu\t%6,%5", op);
7098 output_asm_insn ("iohl\t%6,%4", op);
7099 output_asm_insn ("a\t%7,%7,%6", op);
7102 output_asm_insn ("lqd\t%6,0(%7)", op);
7103 output_asm_insn ("rotqby\t%6,%6,%7", op);
7104 output_asm_insn ("a\t%1,%1,%6", op);
7107 /* Jump to target. */
7108 output_asm_insn ("br\t%0", op);
7110 final_end_function ();
7113 /* Canonicalize a comparison from one we don't have to one we do have. */
7114 static void
7115 spu_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
7116 bool op0_preserve_value)
7118 if (!op0_preserve_value
7119 && (*code == LE || *code == LT || *code == LEU || *code == LTU))
7121 rtx tem = *op0;
7122 *op0 = *op1;
7123 *op1 = tem;
7124 *code = (int)swap_condition ((enum rtx_code)*code);
7128 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7129 to perform. MEM is the memory on which to operate. VAL is the second
7130 operand of the binary operator. BEFORE and AFTER are optional locations to
7131 return the value of MEM either before of after the operation. */
7132 void
7133 spu_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
7134 rtx orig_before, rtx orig_after)
7136 machine_mode mode = GET_MODE (mem);
7137 rtx before = orig_before, after = orig_after;
7139 if (before == NULL_RTX)
7140 before = gen_reg_rtx (mode);
7142 emit_move_insn (before, mem);
7144 if (code == MULT) /* NAND operation */
7146 rtx x = expand_simple_binop (mode, AND, before, val,
7147 NULL_RTX, 1, OPTAB_LIB_WIDEN);
7148 after = expand_simple_unop (mode, NOT, x, after, 1);
7150 else
7152 after = expand_simple_binop (mode, code, before, val,
7153 after, 1, OPTAB_LIB_WIDEN);
7156 emit_move_insn (mem, after);
7158 if (orig_after && after != orig_after)
7159 emit_move_insn (orig_after, after);
7162 /* Implement TARGET_MODES_TIEABLE_P. */
7164 static bool
7165 spu_modes_tieable_p (machine_mode mode1, machine_mode mode2)
7167 return (GET_MODE_BITSIZE (mode1) <= MAX_FIXED_MODE_SIZE
7168 && GET_MODE_BITSIZE (mode2) <= MAX_FIXED_MODE_SIZE);
7171 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. GCC assumes that modes are
7172 in the lowpart of a register, which is only true for SPU. */
7174 static bool
7175 spu_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
7177 return (GET_MODE_SIZE (from) == GET_MODE_SIZE (to)
7178 || (GET_MODE_SIZE (from) <= 4 && GET_MODE_SIZE (to) <= 4)
7179 || (GET_MODE_SIZE (from) >= 16 && GET_MODE_SIZE (to) >= 16));
7182 /* Implement TARGET_TRULY_NOOP_TRUNCATION. */
7184 static bool
7185 spu_truly_noop_truncation (unsigned int outprec, unsigned int inprec)
7187 return inprec <= 32 && outprec <= inprec;
7190 /* Implement TARGET_STATIC_RTX_ALIGNMENT.
7192 Make all static objects 16-byte aligned. This allows us to assume
7193 they are also padded to 16 bytes, which means we can use a single
7194 load or store instruction to access them. */
7196 static HOST_WIDE_INT
7197 spu_static_rtx_alignment (machine_mode mode)
7199 return MAX (GET_MODE_ALIGNMENT (mode), 128);
7202 /* Implement TARGET_CONSTANT_ALIGNMENT.
7204 Make all static objects 16-byte aligned. This allows us to assume
7205 they are also padded to 16 bytes, which means we can use a single
7206 load or store instruction to access them. */
7208 static HOST_WIDE_INT
7209 spu_constant_alignment (const_tree, HOST_WIDE_INT align)
7211 return MAX (align, 128);
7214 /* Table of machine attributes. */
7215 static const struct attribute_spec spu_attribute_table[] =
7217 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7218 affects_type_identity } */
7219 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute,
7220 false },
7221 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute,
7222 false },
7223 { NULL, 0, 0, false, false, false, NULL, false }
7226 /* TARGET overrides. */
7228 #undef TARGET_LRA_P
7229 #define TARGET_LRA_P hook_bool_void_false
7231 #undef TARGET_ADDR_SPACE_POINTER_MODE
7232 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7234 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7235 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7237 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7238 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7239 spu_addr_space_legitimate_address_p
7241 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7242 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7244 #undef TARGET_ADDR_SPACE_SUBSET_P
7245 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7247 #undef TARGET_ADDR_SPACE_CONVERT
7248 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7250 #undef TARGET_INIT_BUILTINS
7251 #define TARGET_INIT_BUILTINS spu_init_builtins
7252 #undef TARGET_BUILTIN_DECL
7253 #define TARGET_BUILTIN_DECL spu_builtin_decl
7255 #undef TARGET_EXPAND_BUILTIN
7256 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7258 #undef TARGET_UNWIND_WORD_MODE
7259 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7261 #undef TARGET_LEGITIMIZE_ADDRESS
7262 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7264 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7265 and .quad for the debugger. When it is known that the assembler is fixed,
7266 these can be removed. */
7267 #undef TARGET_ASM_UNALIGNED_SI_OP
7268 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7270 #undef TARGET_ASM_ALIGNED_DI_OP
7271 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7273 /* The .8byte directive doesn't seem to work well for a 32 bit
7274 architecture. */
7275 #undef TARGET_ASM_UNALIGNED_DI_OP
7276 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7278 #undef TARGET_RTX_COSTS
7279 #define TARGET_RTX_COSTS spu_rtx_costs
7281 #undef TARGET_ADDRESS_COST
7282 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7284 #undef TARGET_SCHED_ISSUE_RATE
7285 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7287 #undef TARGET_SCHED_INIT_GLOBAL
7288 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7290 #undef TARGET_SCHED_INIT
7291 #define TARGET_SCHED_INIT spu_sched_init
7293 #undef TARGET_SCHED_VARIABLE_ISSUE
7294 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7296 #undef TARGET_SCHED_REORDER
7297 #define TARGET_SCHED_REORDER spu_sched_reorder
7299 #undef TARGET_SCHED_REORDER2
7300 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7302 #undef TARGET_SCHED_ADJUST_COST
7303 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7305 #undef TARGET_ATTRIBUTE_TABLE
7306 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7308 #undef TARGET_ASM_INTEGER
7309 #define TARGET_ASM_INTEGER spu_assemble_integer
7311 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7312 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7314 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7315 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7317 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7318 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7320 #undef TARGET_ASM_GLOBALIZE_LABEL
7321 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7323 #undef TARGET_PASS_BY_REFERENCE
7324 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7326 #undef TARGET_FUNCTION_ARG
7327 #define TARGET_FUNCTION_ARG spu_function_arg
7329 #undef TARGET_FUNCTION_ARG_ADVANCE
7330 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7332 #undef TARGET_FUNCTION_ARG_OFFSET
7333 #define TARGET_FUNCTION_ARG_OFFSET spu_function_arg_offset
7335 #undef TARGET_FUNCTION_ARG_PADDING
7336 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7338 #undef TARGET_MUST_PASS_IN_STACK
7339 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7341 #undef TARGET_BUILD_BUILTIN_VA_LIST
7342 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7344 #undef TARGET_EXPAND_BUILTIN_VA_START
7345 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7347 #undef TARGET_SETUP_INCOMING_VARARGS
7348 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7350 #undef TARGET_MACHINE_DEPENDENT_REORG
7351 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7353 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7354 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7356 #undef TARGET_INIT_LIBFUNCS
7357 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7359 #undef TARGET_RETURN_IN_MEMORY
7360 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7362 #undef TARGET_ENCODE_SECTION_INFO
7363 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7365 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7366 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7368 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7369 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7371 #undef TARGET_VECTORIZE_INIT_COST
7372 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7374 #undef TARGET_VECTORIZE_ADD_STMT_COST
7375 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7377 #undef TARGET_VECTORIZE_FINISH_COST
7378 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7380 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7381 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7383 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7384 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7386 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7387 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7389 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7390 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7392 #undef TARGET_SCHED_SMS_RES_MII
7393 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7395 #undef TARGET_SECTION_TYPE_FLAGS
7396 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7398 #undef TARGET_ASM_SELECT_SECTION
7399 #define TARGET_ASM_SELECT_SECTION spu_select_section
7401 #undef TARGET_ASM_UNIQUE_SECTION
7402 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7404 #undef TARGET_LEGITIMATE_ADDRESS_P
7405 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7407 #undef TARGET_LEGITIMATE_CONSTANT_P
7408 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7410 #undef TARGET_TRAMPOLINE_INIT
7411 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7413 #undef TARGET_WARN_FUNC_RETURN
7414 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7416 #undef TARGET_OPTION_OVERRIDE
7417 #define TARGET_OPTION_OVERRIDE spu_option_override
7419 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7420 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7422 #undef TARGET_REF_MAY_ALIAS_ERRNO
7423 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7425 #undef TARGET_ASM_OUTPUT_MI_THUNK
7426 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7427 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7428 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7430 /* Variable tracking should be run after all optimizations which
7431 change order of insns. It also needs a valid CFG. */
7432 #undef TARGET_DELAY_VARTRACK
7433 #define TARGET_DELAY_VARTRACK true
7435 #undef TARGET_CANONICALIZE_COMPARISON
7436 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7438 #undef TARGET_CAN_USE_DOLOOP_P
7439 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7441 #undef TARGET_MODES_TIEABLE_P
7442 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7444 #undef TARGET_HARD_REGNO_NREGS
7445 #define TARGET_HARD_REGNO_NREGS spu_hard_regno_nregs
7447 #undef TARGET_CAN_CHANGE_MODE_CLASS
7448 #define TARGET_CAN_CHANGE_MODE_CLASS spu_can_change_mode_class
7450 #undef TARGET_TRULY_NOOP_TRUNCATION
7451 #define TARGET_TRULY_NOOP_TRUNCATION spu_truly_noop_truncation
7453 #undef TARGET_STATIC_RTX_ALIGNMENT
7454 #define TARGET_STATIC_RTX_ALIGNMENT spu_static_rtx_alignment
7455 #undef TARGET_CONSTANT_ALIGNMENT
7456 #define TARGET_CONSTANT_ALIGNMENT spu_constant_alignment
7458 struct gcc_target targetm = TARGET_INITIALIZER;
7460 #include "gt-spu.h"