1 /* Copyright (C) 2006-2017 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
30 #include "stringpool.h"
37 #include "diagnostic-core.h"
38 #include "insn-attr.h"
40 #include "fold-const.h"
41 #include "stor-layout.h"
49 #include "langhooks.h"
51 #include "sched-int.h"
54 #include "tm-constrs.h"
60 /* This file should be included last. */
61 #include "target-def.h"
63 /* Builtin types, data and prototypes. */
65 enum spu_builtin_type_index
67 SPU_BTI_END_OF_PARAMS
,
69 /* We create new type nodes for these. */
81 /* A 16-byte type. (Implemented with V16QI_type_node) */
84 /* These all correspond to intSI_type_node */
98 /* These correspond to the standard types */
118 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
119 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
120 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
121 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
122 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
123 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
124 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
125 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
126 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
127 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
129 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
131 struct spu_builtin_range
136 static struct spu_builtin_range spu_builtin_range
[] = {
137 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
138 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
139 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
140 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
141 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
142 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
143 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
144 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
145 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
146 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
147 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
148 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
152 /* Target specific attribute specifications. */
153 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
155 /* Prototypes and external defs. */
156 static int get_pipe (rtx_insn
*insn
);
157 static int spu_naked_function_p (tree func
);
158 static int mem_is_padded_component_ref (rtx x
);
159 static void fix_range (const char *);
160 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
162 /* Which instruction set architecture to use. */
164 /* Which cpu are we tuning for. */
167 /* The hardware requires 8 insns between a hint and the branch it
168 effects. This variable describes how many rtl instructions the
169 compiler needs to see before inserting a hint, and then the compiler
170 will insert enough nops to make it at least 8 insns. The default is
171 for the compiler to allow up to 2 nops be emitted. The nops are
172 inserted in pairs, so we round down. */
173 int spu_hint_dist
= (8*4) - (2*4);
188 IC_POOL
, /* constant pool */
189 IC_IL1
, /* one il* instruction */
190 IC_IL2
, /* both ilhu and iohl instructions */
191 IC_IL1s
, /* one il* instruction */
192 IC_IL2s
, /* both ilhu and iohl instructions */
193 IC_FSMBI
, /* the fsmbi instruction */
194 IC_CPAT
, /* one of the c*d instructions */
195 IC_FSMBI2
/* fsmbi plus 1 other instruction */
198 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
199 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
200 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
201 static enum immediate_class
classify_immediate (rtx op
,
204 /* Pointer mode for __ea references. */
205 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
208 /* Define the structure for the machine field in struct function. */
209 struct GTY(()) machine_function
211 /* Register to use for PIC accesses. */
215 /* How to allocate a 'struct machine_function'. */
216 static struct machine_function
*
217 spu_init_machine_status (void)
219 return ggc_cleared_alloc
<machine_function
> ();
222 /* Implement TARGET_OPTION_OVERRIDE. */
224 spu_option_override (void)
226 /* Set up function hooks. */
227 init_machine_status
= spu_init_machine_status
;
229 /* Small loops will be unpeeled at -O3. For SPU it is more important
230 to keep code small by default. */
231 if (!flag_unroll_loops
&& !flag_peel_loops
)
232 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
233 global_options
.x_param_values
,
234 global_options_set
.x_param_values
);
236 flag_omit_frame_pointer
= 1;
238 /* Functions must be 8 byte aligned so we correctly handle dual issue */
239 if (align_functions
< 8)
242 spu_hint_dist
= 8*4 - spu_max_nops
*4;
243 if (spu_hint_dist
< 0)
246 if (spu_fixed_range_string
)
247 fix_range (spu_fixed_range_string
);
249 /* Determine processor architectural level. */
252 if (strcmp (&spu_arch_string
[0], "cell") == 0)
253 spu_arch
= PROCESSOR_CELL
;
254 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
255 spu_arch
= PROCESSOR_CELLEDP
;
257 error ("bad value (%s) for -march= switch", spu_arch_string
);
260 /* Determine processor to tune for. */
263 if (strcmp (&spu_tune_string
[0], "cell") == 0)
264 spu_tune
= PROCESSOR_CELL
;
265 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
266 spu_tune
= PROCESSOR_CELLEDP
;
268 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
271 /* Change defaults according to the processor architecture. */
272 if (spu_arch
== PROCESSOR_CELLEDP
)
274 /* If no command line option has been otherwise specified, change
275 the default to -mno-safe-hints on celledp -- only the original
276 Cell/B.E. processors require this workaround. */
277 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
278 target_flags
&= ~MASK_SAFE_HINTS
;
281 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
284 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
285 struct attribute_spec.handler. */
287 /* True if MODE is valid for the target. By "valid", we mean able to
288 be manipulated in non-trivial ways. In particular, this means all
289 the arithmetic is supported. */
291 spu_scalar_mode_supported_p (scalar_mode mode
)
309 /* Similarly for vector modes. "Supported" here is less strict. At
310 least some operations are supported; need to check optabs or builtins
311 for further details. */
313 spu_vector_mode_supported_p (machine_mode mode
)
330 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
331 least significant bytes of the outer mode. This function returns
332 TRUE for the SUBREG's where this is correct. */
334 valid_subreg (rtx op
)
336 machine_mode om
= GET_MODE (op
);
337 machine_mode im
= GET_MODE (SUBREG_REG (op
));
338 return om
!= VOIDmode
&& im
!= VOIDmode
339 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
340 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
341 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
344 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
345 and adjust the start offset. */
347 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
351 /* Strip any paradoxical SUBREG. */
352 if (GET_CODE (op
) == SUBREG
353 && (GET_MODE_BITSIZE (GET_MODE (op
))
354 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
358 GET_MODE_BITSIZE (GET_MODE (op
)) -
359 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
360 op
= SUBREG_REG (op
);
362 /* If it is smaller than SI, assure a SUBREG */
363 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
367 *start
+= 32 - op_size
;
370 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
371 mode
= mode_for_size (op_size
, MODE_INT
, 0);
372 if (mode
!= GET_MODE (op
))
373 op
= gen_rtx_SUBREG (mode
, op
, 0);
378 spu_expand_extv (rtx ops
[], int unsignedp
)
380 rtx dst
= ops
[0], src
= ops
[1];
381 HOST_WIDE_INT width
= INTVAL (ops
[2]);
382 HOST_WIDE_INT start
= INTVAL (ops
[3]);
383 HOST_WIDE_INT align_mask
;
384 rtx s0
, s1
, mask
, r0
;
386 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
390 /* First, determine if we need 1 TImode load or 2. We need only 1
391 if the bits being extracted do not cross the alignment boundary
392 as determined by the MEM and its address. */
394 align_mask
= -MEM_ALIGN (src
);
395 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
397 /* Alignment is sufficient for 1 load. */
398 s0
= gen_reg_rtx (TImode
);
399 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
402 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
407 s0
= gen_reg_rtx (TImode
);
408 s1
= gen_reg_rtx (TImode
);
409 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
412 gcc_assert (start
+ width
<= 128);
415 rtx r1
= gen_reg_rtx (SImode
);
416 mask
= gen_reg_rtx (TImode
);
417 emit_move_insn (mask
, GEN_INT (-1));
418 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
419 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
420 if (GET_CODE (r0
) == CONST_INT
)
421 r1
= GEN_INT (INTVAL (r0
) & 15);
423 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
424 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
425 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
430 else if (GET_CODE (src
) == SUBREG
)
432 rtx r
= SUBREG_REG (src
);
433 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
434 s0
= gen_reg_rtx (TImode
);
435 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
436 emit_insn (gen_rtx_SET (s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
438 emit_move_insn (s0
, src
);
442 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
443 s0
= gen_reg_rtx (TImode
);
444 emit_move_insn (s0
, src
);
447 /* Now s0 is TImode and contains the bits to extract at start. */
450 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
453 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
455 emit_move_insn (dst
, s0
);
459 spu_expand_insv (rtx ops
[])
461 HOST_WIDE_INT width
= INTVAL (ops
[1]);
462 HOST_WIDE_INT start
= INTVAL (ops
[2]);
463 unsigned HOST_WIDE_INT maskbits
;
464 machine_mode dst_mode
;
465 rtx dst
= ops
[0], src
= ops
[3];
472 if (GET_CODE (ops
[0]) == MEM
)
473 dst
= gen_reg_rtx (TImode
);
475 dst
= adjust_operand (dst
, &start
);
476 dst_mode
= GET_MODE (dst
);
477 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
479 if (CONSTANT_P (src
))
482 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
483 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
485 src
= adjust_operand (src
, 0);
487 mask
= gen_reg_rtx (dst_mode
);
488 shift_reg
= gen_reg_rtx (dst_mode
);
489 shift
= dst_size
- start
- width
;
491 /* It's not safe to use subreg here because the compiler assumes
492 that the SUBREG_REG is right justified in the SUBREG. */
493 convert_move (shift_reg
, src
, 1);
500 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
503 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
506 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
518 maskbits
= (~(unsigned HOST_WIDE_INT
)0 << (32 - width
- start
));
520 maskbits
+= ((unsigned HOST_WIDE_INT
)1 << (32 - start
));
521 emit_move_insn (mask
, GEN_INT (maskbits
));
524 maskbits
= (~(unsigned HOST_WIDE_INT
)0 << (64 - width
- start
));
526 maskbits
+= ((unsigned HOST_WIDE_INT
)1 << (64 - start
));
527 emit_move_insn (mask
, GEN_INT (maskbits
));
531 unsigned char arr
[16];
533 memset (arr
, 0, sizeof (arr
));
534 arr
[i
] = 0xff >> (start
& 7);
535 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
537 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
538 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
544 if (GET_CODE (ops
[0]) == MEM
)
546 rtx low
= gen_reg_rtx (SImode
);
547 rtx rotl
= gen_reg_rtx (SImode
);
548 rtx mask0
= gen_reg_rtx (TImode
);
554 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
555 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
556 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
557 emit_insn (gen_negsi2 (rotl
, low
));
558 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
559 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
560 mem
= change_address (ops
[0], TImode
, addr0
);
561 set_mem_alias_set (mem
, 0);
562 emit_move_insn (dst
, mem
);
563 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
564 if (start
+ width
> MEM_ALIGN (ops
[0]))
566 rtx shl
= gen_reg_rtx (SImode
);
567 rtx mask1
= gen_reg_rtx (TImode
);
568 rtx dst1
= gen_reg_rtx (TImode
);
570 addr1
= plus_constant (Pmode
, addr
, 16);
571 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
572 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
573 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
574 mem1
= change_address (ops
[0], TImode
, addr1
);
575 set_mem_alias_set (mem1
, 0);
576 emit_move_insn (dst1
, mem1
);
577 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
578 emit_move_insn (mem1
, dst1
);
580 emit_move_insn (mem
, dst
);
583 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
588 spu_expand_block_move (rtx ops
[])
590 HOST_WIDE_INT bytes
, align
, offset
;
591 rtx src
, dst
, sreg
, dreg
, target
;
593 if (GET_CODE (ops
[2]) != CONST_INT
594 || GET_CODE (ops
[3]) != CONST_INT
595 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
598 bytes
= INTVAL (ops
[2]);
599 align
= INTVAL (ops
[3]);
609 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
611 dst
= adjust_address (ops
[0], V16QImode
, offset
);
612 src
= adjust_address (ops
[1], V16QImode
, offset
);
613 emit_move_insn (dst
, src
);
618 unsigned char arr
[16] = { 0 };
619 for (i
= 0; i
< bytes
- offset
; i
++)
621 dst
= adjust_address (ops
[0], V16QImode
, offset
);
622 src
= adjust_address (ops
[1], V16QImode
, offset
);
623 mask
= gen_reg_rtx (V16QImode
);
624 sreg
= gen_reg_rtx (V16QImode
);
625 dreg
= gen_reg_rtx (V16QImode
);
626 target
= gen_reg_rtx (V16QImode
);
627 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
628 emit_move_insn (dreg
, dst
);
629 emit_move_insn (sreg
, src
);
630 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
631 emit_move_insn (dst
, target
);
639 { SPU_EQ
, SPU_GT
, SPU_GTU
};
641 int spu_comp_icode
[12][3] = {
642 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
643 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
644 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
645 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
646 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
647 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
648 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
649 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
650 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
651 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
652 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
653 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
656 /* Generate a compare for CODE. Return a brand-new rtx that represents
657 the result of the compare. GCC can figure this out too if we don't
658 provide all variations of compares, but GCC always wants to use
659 WORD_MODE, we can generate better code in most cases if we do it
662 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
664 int reverse_compare
= 0;
665 int reverse_test
= 0;
666 rtx compare_result
, eq_result
;
667 rtx comp_rtx
, eq_rtx
;
668 machine_mode comp_mode
;
669 machine_mode op_mode
;
670 enum spu_comp_code scode
, eq_code
;
671 enum insn_code ior_code
;
672 enum rtx_code code
= GET_CODE (cmp
);
673 rtx op0
= XEXP (cmp
, 0);
674 rtx op1
= XEXP (cmp
, 1);
678 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
679 and so on, to keep the constant in operand 1. */
680 if (GET_CODE (op1
) == CONST_INT
)
682 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
683 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
707 /* However, if we generate an integer result, performing a reverse test
708 would require an extra negation, so avoid that where possible. */
709 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
711 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
712 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
729 op_mode
= GET_MODE (op0
);
735 if (HONOR_NANS (op_mode
))
750 if (HONOR_NANS (op_mode
))
842 comp_mode
= V4SImode
;
846 comp_mode
= V2DImode
;
853 if (GET_MODE (op1
) == DFmode
854 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
857 if (is_set
== 0 && op1
== const0_rtx
858 && (GET_MODE (op0
) == SImode
859 || GET_MODE (op0
) == HImode
860 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
862 /* Don't need to set a register with the result when we are
863 comparing against zero and branching. */
864 reverse_test
= !reverse_test
;
865 compare_result
= op0
;
869 compare_result
= gen_reg_rtx (comp_mode
);
878 if (spu_comp_icode
[index
][scode
] == 0)
881 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
883 op0
= force_reg (op_mode
, op0
);
884 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
886 op1
= force_reg (op_mode
, op1
);
887 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
891 emit_insn (comp_rtx
);
895 eq_result
= gen_reg_rtx (comp_mode
);
896 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
901 ior_code
= optab_handler (ior_optab
, comp_mode
);
902 gcc_assert (ior_code
!= CODE_FOR_nothing
);
903 emit_insn (GEN_FCN (ior_code
)
904 (compare_result
, compare_result
, eq_result
));
913 /* We don't have branch on QI compare insns, so we convert the
914 QI compare result to a HI result. */
915 if (comp_mode
== QImode
)
917 rtx old_res
= compare_result
;
918 compare_result
= gen_reg_rtx (HImode
);
920 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
924 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
926 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
928 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
929 emit_jump_insn (gen_rtx_SET (pc_rtx
,
930 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
933 else if (is_set
== 2)
935 rtx target
= operands
[0];
936 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
937 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
938 machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
940 rtx op_t
= operands
[2];
941 rtx op_f
= operands
[3];
943 /* The result of the comparison can be SI, HI or QI mode. Create a
944 mask based on that result. */
945 if (target_size
> compare_size
)
947 select_mask
= gen_reg_rtx (mode
);
948 emit_insn (gen_extend_compare (select_mask
, compare_result
));
950 else if (target_size
< compare_size
)
952 gen_rtx_SUBREG (mode
, compare_result
,
953 (compare_size
- target_size
) / BITS_PER_UNIT
);
954 else if (comp_mode
!= mode
)
955 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
957 select_mask
= compare_result
;
959 if (GET_MODE (target
) != GET_MODE (op_t
)
960 || GET_MODE (target
) != GET_MODE (op_f
))
964 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
966 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
970 rtx target
= operands
[0];
972 emit_insn (gen_rtx_SET (compare_result
,
973 gen_rtx_NOT (comp_mode
, compare_result
)));
974 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
975 emit_insn (gen_extendhisi2 (target
, compare_result
));
976 else if (GET_MODE (target
) == SImode
977 && GET_MODE (compare_result
) == QImode
)
978 emit_insn (gen_extend_compare (target
, compare_result
));
980 emit_move_insn (target
, compare_result
);
985 const_double_to_hwint (rtx x
)
988 if (GET_MODE (x
) == SFmode
)
989 REAL_VALUE_TO_TARGET_SINGLE (*CONST_DOUBLE_REAL_VALUE (x
), val
);
990 else if (GET_MODE (x
) == DFmode
)
993 REAL_VALUE_TO_TARGET_DOUBLE (*CONST_DOUBLE_REAL_VALUE (x
), l
);
995 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1003 hwint_to_const_double (machine_mode mode
, HOST_WIDE_INT v
)
1007 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1010 tv
[0] = (v
<< 32) >> 32;
1011 else if (mode
== DFmode
)
1013 tv
[1] = (v
<< 32) >> 32;
1016 real_from_target (&rv
, tv
, mode
);
1017 return const_double_from_real_value (rv
, mode
);
1021 print_operand_address (FILE * file
, register rtx addr
)
1026 if (GET_CODE (addr
) == AND
1027 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1028 && INTVAL (XEXP (addr
, 1)) == -16)
1029 addr
= XEXP (addr
, 0);
1031 switch (GET_CODE (addr
))
1034 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1038 reg
= XEXP (addr
, 0);
1039 offset
= XEXP (addr
, 1);
1040 if (GET_CODE (offset
) == REG
)
1042 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1043 reg_names
[REGNO (offset
)]);
1045 else if (GET_CODE (offset
) == CONST_INT
)
1047 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1048 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1058 output_addr_const (file
, addr
);
1068 print_operand (FILE * file
, rtx x
, int code
)
1070 machine_mode mode
= GET_MODE (x
);
1072 unsigned char arr
[16];
1073 int xcode
= GET_CODE (x
);
1075 if (GET_MODE (x
) == VOIDmode
)
1078 case 'L': /* 128 bits, signed */
1079 case 'm': /* 128 bits, signed */
1080 case 'T': /* 128 bits, signed */
1081 case 't': /* 128 bits, signed */
1084 case 'K': /* 64 bits, signed */
1085 case 'k': /* 64 bits, signed */
1086 case 'D': /* 64 bits, signed */
1087 case 'd': /* 64 bits, signed */
1090 case 'J': /* 32 bits, signed */
1091 case 'j': /* 32 bits, signed */
1092 case 's': /* 32 bits, signed */
1093 case 'S': /* 32 bits, signed */
1100 case 'j': /* 32 bits, signed */
1101 case 'k': /* 64 bits, signed */
1102 case 'm': /* 128 bits, signed */
1103 if (xcode
== CONST_INT
1104 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1106 gcc_assert (logical_immediate_p (x
, mode
));
1107 constant_to_array (mode
, x
, arr
);
1108 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1109 val
= trunc_int_for_mode (val
, SImode
);
1110 switch (which_logical_immediate (val
))
1115 fprintf (file
, "h");
1118 fprintf (file
, "b");
1128 case 'J': /* 32 bits, signed */
1129 case 'K': /* 64 bits, signed */
1130 case 'L': /* 128 bits, signed */
1131 if (xcode
== CONST_INT
1132 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1134 gcc_assert (logical_immediate_p (x
, mode
)
1135 || iohl_immediate_p (x
, mode
));
1136 constant_to_array (mode
, x
, arr
);
1137 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1138 val
= trunc_int_for_mode (val
, SImode
);
1139 switch (which_logical_immediate (val
))
1145 val
= trunc_int_for_mode (val
, HImode
);
1148 val
= trunc_int_for_mode (val
, QImode
);
1153 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1159 case 't': /* 128 bits, signed */
1160 case 'd': /* 64 bits, signed */
1161 case 's': /* 32 bits, signed */
1164 enum immediate_class c
= classify_immediate (x
, mode
);
1168 constant_to_array (mode
, x
, arr
);
1169 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1170 val
= trunc_int_for_mode (val
, SImode
);
1171 switch (which_immediate_load (val
))
1176 fprintf (file
, "a");
1179 fprintf (file
, "h");
1182 fprintf (file
, "hu");
1189 constant_to_array (mode
, x
, arr
);
1190 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1192 fprintf (file
, "b");
1194 fprintf (file
, "h");
1196 fprintf (file
, "w");
1198 fprintf (file
, "d");
1201 if (xcode
== CONST_VECTOR
)
1203 x
= CONST_VECTOR_ELT (x
, 0);
1204 xcode
= GET_CODE (x
);
1206 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1207 fprintf (file
, "a");
1208 else if (xcode
== HIGH
)
1209 fprintf (file
, "hu");
1223 case 'T': /* 128 bits, signed */
1224 case 'D': /* 64 bits, signed */
1225 case 'S': /* 32 bits, signed */
1228 enum immediate_class c
= classify_immediate (x
, mode
);
1232 constant_to_array (mode
, x
, arr
);
1233 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1234 val
= trunc_int_for_mode (val
, SImode
);
1235 switch (which_immediate_load (val
))
1242 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1247 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1250 constant_to_array (mode
, x
, arr
);
1252 for (i
= 0; i
< 16; i
++)
1257 print_operand (file
, GEN_INT (val
), 0);
1260 constant_to_array (mode
, x
, arr
);
1261 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1262 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1267 if (GET_CODE (x
) == CONST_VECTOR
)
1268 x
= CONST_VECTOR_ELT (x
, 0);
1269 output_addr_const (file
, x
);
1271 fprintf (file
, "@h");
1285 if (xcode
== CONST_INT
)
1287 /* Only 4 least significant bits are relevant for generate
1288 control word instructions. */
1289 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1294 case 'M': /* print code for c*d */
1295 if (GET_CODE (x
) == CONST_INT
)
1299 fprintf (file
, "b");
1302 fprintf (file
, "h");
1305 fprintf (file
, "w");
1308 fprintf (file
, "d");
1317 case 'N': /* Negate the operand */
1318 if (xcode
== CONST_INT
)
1319 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1320 else if (xcode
== CONST_VECTOR
)
1321 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1322 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1325 case 'I': /* enable/disable interrupts */
1326 if (xcode
== CONST_INT
)
1327 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1330 case 'b': /* branch modifiers */
1332 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1333 else if (COMPARISON_P (x
))
1334 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1337 case 'i': /* indirect call */
1340 if (GET_CODE (XEXP (x
, 0)) == REG
)
1341 /* Used in indirect function calls. */
1342 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1344 output_address (GET_MODE (x
), XEXP (x
, 0));
1348 case 'p': /* load/store */
1352 xcode
= GET_CODE (x
);
1357 xcode
= GET_CODE (x
);
1360 fprintf (file
, "d");
1361 else if (xcode
== CONST_INT
)
1362 fprintf (file
, "a");
1363 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1364 fprintf (file
, "r");
1365 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1367 if (GET_CODE (XEXP (x
, 1)) == REG
)
1368 fprintf (file
, "x");
1370 fprintf (file
, "d");
1375 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1377 output_addr_const (file
, GEN_INT (val
));
1381 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1383 output_addr_const (file
, GEN_INT (val
));
1387 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1389 output_addr_const (file
, GEN_INT (val
));
1393 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1394 val
= (val
>> 3) & 0x1f;
1395 output_addr_const (file
, GEN_INT (val
));
1399 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1402 output_addr_const (file
, GEN_INT (val
));
1406 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1409 output_addr_const (file
, GEN_INT (val
));
1413 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1416 output_addr_const (file
, GEN_INT (val
));
1420 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1421 val
= -(val
& -8ll);
1422 val
= (val
>> 3) & 0x1f;
1423 output_addr_const (file
, GEN_INT (val
));
1428 constant_to_array (mode
, x
, arr
);
1429 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1430 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1435 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1436 else if (xcode
== MEM
)
1437 output_address (GET_MODE (x
), XEXP (x
, 0));
1438 else if (xcode
== CONST_VECTOR
)
1439 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1441 output_addr_const (file
, x
);
1448 output_operand_lossage ("invalid %%xn code");
1453 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1454 caller saved register. For leaf functions it is more efficient to
1455 use a volatile register because we won't need to save and restore the
1456 pic register. This routine is only valid after register allocation
1457 is completed, so we can pick an unused register. */
1461 if (!reload_completed
&& !reload_in_progress
)
1464 /* If we've already made the decision, we need to keep with it. Once we've
1465 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1466 return true since the register is now live; this should not cause us to
1467 "switch back" to using pic_offset_table_rtx. */
1468 if (!cfun
->machine
->pic_reg
)
1470 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1471 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1473 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1476 return cfun
->machine
->pic_reg
;
1479 /* Split constant addresses to handle cases that are too large.
1480 Add in the pic register when in PIC mode.
1481 Split immediates that require more than 1 instruction. */
1483 spu_split_immediate (rtx
* ops
)
1485 machine_mode mode
= GET_MODE (ops
[0]);
1486 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1492 unsigned char arrhi
[16];
1493 unsigned char arrlo
[16];
1494 rtx to
, temp
, hi
, lo
;
1496 /* We need to do reals as ints because the constant used in the
1497 IOR might not be a legitimate real constant. */
1498 scalar_int_mode imode
= int_mode_for_mode (mode
).require ();
1499 constant_to_array (mode
, ops
[1], arrhi
);
1501 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1504 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1505 for (i
= 0; i
< 16; i
+= 4)
1507 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1508 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1509 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1510 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1512 hi
= array_to_constant (imode
, arrhi
);
1513 lo
= array_to_constant (imode
, arrlo
);
1514 emit_move_insn (temp
, hi
);
1515 emit_insn (gen_rtx_SET (to
, gen_rtx_IOR (imode
, temp
, lo
)));
1520 unsigned char arr_fsmbi
[16];
1521 unsigned char arr_andbi
[16];
1522 rtx to
, reg_fsmbi
, reg_and
;
1524 /* We need to do reals as ints because the constant used in the
1525 * AND might not be a legitimate real constant. */
1526 scalar_int_mode imode
= int_mode_for_mode (mode
).require ();
1527 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1529 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1532 for (i
= 0; i
< 16; i
++)
1533 if (arr_fsmbi
[i
] != 0)
1535 arr_andbi
[0] = arr_fsmbi
[i
];
1536 arr_fsmbi
[i
] = 0xff;
1538 for (i
= 1; i
< 16; i
++)
1539 arr_andbi
[i
] = arr_andbi
[0];
1540 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1541 reg_and
= array_to_constant (imode
, arr_andbi
);
1542 emit_move_insn (to
, reg_fsmbi
);
1543 emit_insn (gen_rtx_SET (to
, gen_rtx_AND (imode
, to
, reg_and
)));
1547 if (reload_in_progress
|| reload_completed
)
1549 rtx mem
= force_const_mem (mode
, ops
[1]);
1550 if (TARGET_LARGE_MEM
)
1552 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1553 emit_move_insn (addr
, XEXP (mem
, 0));
1554 mem
= replace_equiv_address (mem
, addr
);
1556 emit_move_insn (ops
[0], mem
);
1562 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1566 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1567 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1570 emit_insn (gen_pic (ops
[0], ops
[1]));
1573 rtx pic_reg
= get_pic_reg ();
1574 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1576 return flag_pic
|| c
== IC_IL2s
;
1587 /* SAVING is TRUE when we are generating the actual load and store
1588 instructions for REGNO. When determining the size of the stack
1589 needed for saving register we must allocate enough space for the
1590 worst case, because we don't always have the information early enough
1591 to not allocate it. But we can at least eliminate the actual loads
1592 and stores during the prologue/epilogue. */
1594 need_to_save_reg (int regno
, int saving
)
1596 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1599 && regno
== PIC_OFFSET_TABLE_REGNUM
1600 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1605 /* This function is only correct starting with local register
1608 spu_saved_regs_size (void)
1610 int reg_save_size
= 0;
1613 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1614 if (need_to_save_reg (regno
, 0))
1615 reg_save_size
+= 0x10;
1616 return reg_save_size
;
1620 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1622 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1624 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1625 return emit_insn (gen_movv4si (mem
, reg
));
1629 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1631 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1633 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1634 return emit_insn (gen_movv4si (reg
, mem
));
1637 /* This happens after reload, so we need to expand it. */
1639 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1642 if (satisfies_constraint_K (GEN_INT (imm
)))
1644 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1648 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1649 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1650 if (REGNO (src
) == REGNO (scratch
))
1656 /* Return nonzero if this function is known to have a null epilogue. */
1659 direct_return (void)
1661 if (reload_completed
)
1663 if (cfun
->static_chain_decl
== 0
1664 && (spu_saved_regs_size ()
1666 + crtl
->outgoing_args_size
1667 + crtl
->args
.pretend_args_size
== 0)
1675 The stack frame looks like this:
1679 AP -> +-------------+
1682 prev SP | back chain |
1685 | reg save | crtl->args.pretend_args_size bytes
1688 | saved regs | spu_saved_regs_size() bytes
1689 FP -> +-------------+
1691 | vars | get_frame_size() bytes
1692 HFP -> +-------------+
1695 | args | crtl->outgoing_args_size bytes
1701 SP -> +-------------+
1705 spu_expand_prologue (void)
1707 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1708 HOST_WIDE_INT total_size
;
1709 HOST_WIDE_INT saved_regs_size
;
1710 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1711 rtx scratch_reg_0
, scratch_reg_1
;
1715 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1716 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1718 if (spu_naked_function_p (current_function_decl
))
1721 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1722 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1724 saved_regs_size
= spu_saved_regs_size ();
1725 total_size
= size
+ saved_regs_size
1726 + crtl
->outgoing_args_size
1727 + crtl
->args
.pretend_args_size
;
1730 || cfun
->calls_alloca
|| total_size
> 0)
1731 total_size
+= STACK_POINTER_OFFSET
;
1733 /* Save this first because code after this might use the link
1734 register as a scratch register. */
1737 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1738 RTX_FRAME_RELATED_P (insn
) = 1;
1743 offset
= -crtl
->args
.pretend_args_size
;
1744 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1745 if (need_to_save_reg (regno
, 1))
1748 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1749 RTX_FRAME_RELATED_P (insn
) = 1;
1753 if (flag_pic
&& cfun
->machine
->pic_reg
)
1755 rtx pic_reg
= cfun
->machine
->pic_reg
;
1756 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1757 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1762 if (flag_stack_check
)
1764 /* We compare against total_size-1 because
1765 ($sp >= total_size) <=> ($sp > total_size-1) */
1766 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1767 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1768 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1769 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1771 emit_move_insn (scratch_v4si
, size_v4si
);
1772 size_v4si
= scratch_v4si
;
1774 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1775 emit_insn (gen_vec_extractv4sisi
1776 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1777 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1780 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1781 the value of the previous $sp because we save it as the back
1783 if (total_size
<= 2000)
1785 /* In this case we save the back chain first. */
1786 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1788 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1792 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1794 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1796 RTX_FRAME_RELATED_P (insn
) = 1;
1797 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1798 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1800 if (total_size
> 2000)
1802 /* Save the back chain ptr */
1803 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1806 if (frame_pointer_needed
)
1808 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1809 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1810 + crtl
->outgoing_args_size
;
1811 /* Set the new frame_pointer */
1812 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1813 RTX_FRAME_RELATED_P (insn
) = 1;
1814 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1815 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1816 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1820 if (flag_stack_usage_info
)
1821 current_function_static_stack_size
= total_size
;
1825 spu_expand_epilogue (bool sibcall_p
)
1827 int size
= get_frame_size (), offset
, regno
;
1828 HOST_WIDE_INT saved_regs_size
, total_size
;
1829 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1832 if (spu_naked_function_p (current_function_decl
))
1835 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1837 saved_regs_size
= spu_saved_regs_size ();
1838 total_size
= size
+ saved_regs_size
1839 + crtl
->outgoing_args_size
1840 + crtl
->args
.pretend_args_size
;
1843 || cfun
->calls_alloca
|| total_size
> 0)
1844 total_size
+= STACK_POINTER_OFFSET
;
1848 if (cfun
->calls_alloca
)
1849 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1851 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1854 if (saved_regs_size
> 0)
1856 offset
= -crtl
->args
.pretend_args_size
;
1857 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1858 if (need_to_save_reg (regno
, 1))
1861 frame_emit_load (regno
, sp_reg
, offset
);
1867 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1871 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1872 emit_jump_insn (gen__return ());
1877 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1881 /* This is inefficient because it ends up copying to a save-register
1882 which then gets saved even though $lr has already been saved. But
1883 it does generate better code for leaf functions and we don't need
1884 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1885 used for __builtin_return_address anyway, so maybe we don't care if
1886 it's inefficient. */
1887 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1891 /* Given VAL, generate a constant appropriate for MODE.
1892 If MODE is a vector mode, every element will be VAL.
1893 For TImode, VAL will be zero extended to 128 bits. */
1895 spu_const (machine_mode mode
, HOST_WIDE_INT val
)
1901 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1902 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1903 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1904 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1906 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1907 return immed_double_const (val
, 0, mode
);
1909 /* val is the bit representation of the float */
1910 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1911 return hwint_to_const_double (mode
, val
);
1913 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1914 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1916 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1918 units
= GET_MODE_NUNITS (mode
);
1920 v
= rtvec_alloc (units
);
1922 for (i
= 0; i
< units
; ++i
)
1923 RTVEC_ELT (v
, i
) = inner
;
1925 return gen_rtx_CONST_VECTOR (mode
, v
);
1928 /* Create a MODE vector constant from 4 ints. */
1930 spu_const_from_ints(machine_mode mode
, int a
, int b
, int c
, int d
)
1932 unsigned char arr
[16];
1933 arr
[0] = (a
>> 24) & 0xff;
1934 arr
[1] = (a
>> 16) & 0xff;
1935 arr
[2] = (a
>> 8) & 0xff;
1936 arr
[3] = (a
>> 0) & 0xff;
1937 arr
[4] = (b
>> 24) & 0xff;
1938 arr
[5] = (b
>> 16) & 0xff;
1939 arr
[6] = (b
>> 8) & 0xff;
1940 arr
[7] = (b
>> 0) & 0xff;
1941 arr
[8] = (c
>> 24) & 0xff;
1942 arr
[9] = (c
>> 16) & 0xff;
1943 arr
[10] = (c
>> 8) & 0xff;
1944 arr
[11] = (c
>> 0) & 0xff;
1945 arr
[12] = (d
>> 24) & 0xff;
1946 arr
[13] = (d
>> 16) & 0xff;
1947 arr
[14] = (d
>> 8) & 0xff;
1948 arr
[15] = (d
>> 0) & 0xff;
1949 return array_to_constant(mode
, arr
);
1952 /* branch hint stuff */
1954 /* An array of these is used to propagate hints to predecessor blocks. */
1957 rtx_insn
*prop_jump
; /* propagated from another block */
1958 int bb_index
; /* the original block. */
1960 static struct spu_bb_info
*spu_bb_info
;
1962 #define STOP_HINT_P(INSN) \
1964 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1965 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1967 /* 1 when RTX is a hinted branch or its target. We keep track of
1968 what has been hinted so the safe-hint code can test it easily. */
1969 #define HINTED_P(RTX) \
1970 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1972 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1973 #define SCHED_ON_EVEN_P(RTX) \
1974 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1976 /* Emit a nop for INSN such that the two will dual issue. This assumes
1977 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1978 We check for TImode to handle a MULTI1 insn which has dual issued its
1979 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1981 emit_nop_for_insn (rtx_insn
*insn
)
1986 /* We need to handle JUMP_TABLE_DATA separately. */
1987 if (JUMP_TABLE_DATA_P (insn
))
1989 new_insn
= emit_insn_after (gen_lnop(), insn
);
1990 recog_memoized (new_insn
);
1991 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
1995 p
= get_pipe (insn
);
1996 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
1997 new_insn
= emit_insn_after (gen_lnop (), insn
);
1998 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2000 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2001 PUT_MODE (new_insn
, TImode
);
2002 PUT_MODE (insn
, VOIDmode
);
2005 new_insn
= emit_insn_after (gen_lnop (), insn
);
2006 recog_memoized (new_insn
);
2007 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2010 /* Insert nops in basic blocks to meet dual issue alignment
2011 requirements. Also make sure hbrp and hint instructions are at least
2012 one cycle apart, possibly inserting a nop. */
2016 rtx_insn
*insn
, *next_insn
, *prev_insn
, *hbr_insn
= 0;
2020 /* This sets up INSN_ADDRESSES. */
2021 shorten_branches (get_insns ());
2023 /* Keep track of length added by nops. */
2027 insn
= get_insns ();
2028 if (!active_insn_p (insn
))
2029 insn
= next_active_insn (insn
);
2030 for (; insn
; insn
= next_insn
)
2032 next_insn
= next_active_insn (insn
);
2033 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2034 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2038 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2039 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2040 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2043 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2044 PUT_MODE (prev_insn
, GET_MODE (insn
));
2045 PUT_MODE (insn
, TImode
);
2046 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2052 if (INSN_CODE (insn
) == CODE_FOR_blockage
&& next_insn
)
2054 if (GET_MODE (insn
) == TImode
)
2055 PUT_MODE (next_insn
, TImode
);
2057 next_insn
= next_active_insn (insn
);
2059 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2060 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2062 if (((addr
+ length
) & 7) != 0)
2064 emit_nop_for_insn (prev_insn
);
2068 else if (GET_MODE (insn
) == TImode
2069 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2070 || get_attr_type (insn
) == TYPE_MULTI0
)
2071 && ((addr
+ length
) & 7) != 0)
2073 /* prev_insn will always be set because the first insn is
2074 always 8-byte aligned. */
2075 emit_nop_for_insn (prev_insn
);
2083 /* Routines for branch hints. */
2086 spu_emit_branch_hint (rtx_insn
*before
, rtx_insn
*branch
, rtx target
,
2087 int distance
, sbitmap blocks
)
2091 rtx_jump_table_data
*table
;
2093 if (before
== 0 || branch
== 0 || target
== 0)
2096 /* While scheduling we require hints to be no further than 600, so
2097 we need to enforce that here too */
2101 /* If we have a Basic block note, emit it after the basic block note. */
2102 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2103 before
= NEXT_INSN (before
);
2105 rtx_code_label
*branch_label
= gen_label_rtx ();
2106 LABEL_NUSES (branch_label
)++;
2107 LABEL_PRESERVE_P (branch_label
) = 1;
2108 insn
= emit_label_before (branch_label
, branch
);
2109 rtx branch_label_ref
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2110 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2112 hint
= emit_insn_before (gen_hbr (branch_label_ref
, target
), before
);
2113 recog_memoized (hint
);
2114 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2115 HINTED_P (branch
) = 1;
2117 if (GET_CODE (target
) == LABEL_REF
)
2118 HINTED_P (XEXP (target
, 0)) = 1;
2119 else if (tablejump_p (branch
, 0, &table
))
2123 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2124 vec
= XVEC (PATTERN (table
), 0);
2126 vec
= XVEC (PATTERN (table
), 1);
2127 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2128 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2131 if (distance
>= 588)
2133 /* Make sure the hint isn't scheduled any earlier than this point,
2134 which could make it too far for the branch offest to fit */
2135 insn
= emit_insn_before (gen_blockage (), hint
);
2136 recog_memoized (insn
);
2137 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2139 else if (distance
<= 8 * 4)
2141 /* To guarantee at least 8 insns between the hint and branch we
2144 for (d
= distance
; d
< 8 * 4; d
+= 4)
2147 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2148 recog_memoized (insn
);
2149 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2152 /* Make sure any nops inserted aren't scheduled before the hint. */
2153 insn
= emit_insn_after (gen_blockage (), hint
);
2154 recog_memoized (insn
);
2155 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2157 /* Make sure any nops inserted aren't scheduled after the call. */
2158 if (CALL_P (branch
) && distance
< 8 * 4)
2160 insn
= emit_insn_before (gen_blockage (), branch
);
2161 recog_memoized (insn
);
2162 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2167 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2168 the rtx for the branch target. */
2170 get_branch_target (rtx_insn
*branch
)
2172 if (JUMP_P (branch
))
2176 /* Return statements */
2177 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2178 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2181 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2184 set
= single_set (branch
);
2185 src
= SET_SRC (set
);
2186 if (GET_CODE (SET_DEST (set
)) != PC
)
2189 if (GET_CODE (src
) == IF_THEN_ELSE
)
2192 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2195 /* If the more probable case is not a fall through, then
2196 try a branch hint. */
2197 int prob
= profile_probability::from_reg_br_prob_note
2198 (XINT (note
, 0)).to_reg_br_prob_base ();
2199 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2200 && GET_CODE (XEXP (src
, 1)) != PC
)
2201 lab
= XEXP (src
, 1);
2202 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2203 && GET_CODE (XEXP (src
, 2)) != PC
)
2204 lab
= XEXP (src
, 2);
2208 if (GET_CODE (lab
) == RETURN
)
2209 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2217 else if (CALL_P (branch
))
2220 /* All of our call patterns are in a PARALLEL and the CALL is
2221 the first pattern in the PARALLEL. */
2222 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2224 call
= XVECEXP (PATTERN (branch
), 0, 0);
2225 if (GET_CODE (call
) == SET
)
2226 call
= SET_SRC (call
);
2227 if (GET_CODE (call
) != CALL
)
2229 return XEXP (XEXP (call
, 0), 0);
2234 /* The special $hbr register is used to prevent the insn scheduler from
2235 moving hbr insns across instructions which invalidate them. It
2236 should only be used in a clobber, and this function searches for
2237 insns which clobber it. */
2239 insn_clobbers_hbr (rtx_insn
*insn
)
2242 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2244 rtx parallel
= PATTERN (insn
);
2247 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2249 clobber
= XVECEXP (parallel
, 0, j
);
2250 if (GET_CODE (clobber
) == CLOBBER
2251 && GET_CODE (XEXP (clobber
, 0)) == REG
2252 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2259 /* Search up to 32 insns starting at FIRST:
2260 - at any kind of hinted branch, just return
2261 - at any unconditional branch in the first 15 insns, just return
2262 - at a call or indirect branch, after the first 15 insns, force it to
2263 an even address and return
2264 - at any unconditional branch, after the first 15 insns, force it to
2266 At then end of the search, insert an hbrp within 4 insns of FIRST,
2267 and an hbrp within 16 instructions of FIRST.
2270 insert_hbrp_for_ilb_runout (rtx_insn
*first
)
2272 rtx_insn
*insn
, *before_4
= 0, *before_16
= 0;
2273 int addr
= 0, length
, first_addr
= -1;
2274 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2275 int insert_lnop_after
= 0;
2276 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2279 if (first_addr
== -1)
2280 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2281 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2282 length
= get_attr_length (insn
);
2284 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2286 /* We test for 14 instructions because the first hbrp will add
2287 up to 2 instructions. */
2288 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2291 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2293 /* Make sure an hbrp is at least 2 cycles away from a hint.
2294 Insert an lnop after the hbrp when necessary. */
2295 if (before_4
== 0 && addr
> 0)
2298 insert_lnop_after
|= 1;
2300 else if (before_4
&& addr
<= 4 * 4)
2301 insert_lnop_after
|= 1;
2302 if (before_16
== 0 && addr
> 10 * 4)
2305 insert_lnop_after
|= 2;
2307 else if (before_16
&& addr
<= 14 * 4)
2308 insert_lnop_after
|= 2;
2311 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2313 if (addr
< hbrp_addr0
)
2315 else if (addr
< hbrp_addr1
)
2319 if (CALL_P (insn
) || JUMP_P (insn
))
2321 if (HINTED_P (insn
))
2324 /* Any branch after the first 15 insns should be on an even
2325 address to avoid a special case branch. There might be
2326 some nops and/or hbrps inserted, so we test after 10
2329 SCHED_ON_EVEN_P (insn
) = 1;
2332 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2336 if (addr
+ length
>= 32 * 4)
2338 gcc_assert (before_4
&& before_16
);
2339 if (hbrp_addr0
> 4 * 4)
2342 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2343 recog_memoized (insn
);
2344 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2345 INSN_ADDRESSES_NEW (insn
,
2346 INSN_ADDRESSES (INSN_UID (before_4
)));
2347 PUT_MODE (insn
, GET_MODE (before_4
));
2348 PUT_MODE (before_4
, TImode
);
2349 if (insert_lnop_after
& 1)
2351 insn
= emit_insn_before (gen_lnop (), before_4
);
2352 recog_memoized (insn
);
2353 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2354 INSN_ADDRESSES_NEW (insn
,
2355 INSN_ADDRESSES (INSN_UID (before_4
)));
2356 PUT_MODE (insn
, TImode
);
2359 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2360 && hbrp_addr1
> 16 * 4)
2363 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2364 recog_memoized (insn
);
2365 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2366 INSN_ADDRESSES_NEW (insn
,
2367 INSN_ADDRESSES (INSN_UID (before_16
)));
2368 PUT_MODE (insn
, GET_MODE (before_16
));
2369 PUT_MODE (before_16
, TImode
);
2370 if (insert_lnop_after
& 2)
2372 insn
= emit_insn_before (gen_lnop (), before_16
);
2373 recog_memoized (insn
);
2374 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2375 INSN_ADDRESSES_NEW (insn
,
2376 INSN_ADDRESSES (INSN_UID
2378 PUT_MODE (insn
, TImode
);
2384 else if (BARRIER_P (insn
))
2389 /* The SPU might hang when it executes 48 inline instructions after a
2390 hinted branch jumps to its hinted target. The beginning of a
2391 function and the return from a call might have been hinted, and
2392 must be handled as well. To prevent a hang we insert 2 hbrps. The
2393 first should be within 6 insns of the branch target. The second
2394 should be within 22 insns of the branch target. When determining
2395 if hbrps are necessary, we look for only 32 inline instructions,
2396 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2397 when inserting new hbrps, we insert them within 4 and 16 insns of
2403 if (TARGET_SAFE_HINTS
)
2405 shorten_branches (get_insns ());
2406 /* Insert hbrp at beginning of function */
2407 insn
= next_active_insn (get_insns ());
2409 insert_hbrp_for_ilb_runout (insn
);
2410 /* Insert hbrp after hinted targets. */
2411 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2412 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2413 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2417 static int in_spu_reorg
;
2420 spu_var_tracking (void)
2422 if (flag_var_tracking
)
2425 timevar_push (TV_VAR_TRACKING
);
2426 variable_tracking_main ();
2427 timevar_pop (TV_VAR_TRACKING
);
2428 df_finish_pass (false);
2432 /* Insert branch hints. There are no branch optimizations after this
2433 pass, so it's safe to set our branch hints now. */
2435 spu_machine_dependent_reorg (void)
2439 rtx_insn
*branch
, *insn
;
2440 rtx branch_target
= 0;
2441 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2445 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2447 /* We still do it for unoptimized code because an external
2448 function might have hinted a call or return. */
2449 compute_bb_for_insn ();
2452 spu_var_tracking ();
2453 free_bb_for_insn ();
2457 blocks
= sbitmap_alloc (last_basic_block_for_fn (cfun
));
2458 bitmap_clear (blocks
);
2461 compute_bb_for_insn ();
2463 /* (Re-)discover loops so that bb->loop_father can be used
2464 in the analysis below. */
2465 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2470 (struct spu_bb_info
*) xcalloc (n_basic_blocks_for_fn (cfun
),
2471 sizeof (struct spu_bb_info
));
2473 /* We need exact insn addresses and lengths. */
2474 shorten_branches (get_insns ());
2476 for (i
= n_basic_blocks_for_fn (cfun
) - 1; i
>= 0; i
--)
2478 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2480 if (spu_bb_info
[i
].prop_jump
)
2482 branch
= spu_bb_info
[i
].prop_jump
;
2483 branch_target
= get_branch_target (branch
);
2484 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2485 required_dist
= spu_hint_dist
;
2487 /* Search from end of a block to beginning. In this loop, find
2488 jumps which need a branch and emit them only when:
2489 - it's an indirect branch and we're at the insn which sets
2491 - we're at an insn that will invalidate the hint. e.g., a
2492 call, another hint insn, inline asm that clobbers $hbr, and
2493 some inlined operations (divmodsi4). Don't consider jumps
2494 because they are only at the end of a block and are
2495 considered when we are deciding whether to propagate
2496 - we're getting too far away from the branch. The hbr insns
2497 only have a signed 10 bit offset
2498 We go back as far as possible so the branch will be considered
2499 for propagation when we get to the beginning of the block. */
2500 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2504 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2506 && ((GET_CODE (branch_target
) == REG
2507 && set_of (branch_target
, insn
) != NULL_RTX
)
2508 || insn_clobbers_hbr (insn
)
2509 || branch_addr
- insn_addr
> 600))
2511 rtx_insn
*next
= NEXT_INSN (insn
);
2512 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2513 if (insn
!= BB_END (bb
)
2514 && branch_addr
- next_addr
>= required_dist
)
2518 "hint for %i in block %i before %i\n",
2519 INSN_UID (branch
), bb
->index
,
2521 spu_emit_branch_hint (next
, branch
, branch_target
,
2522 branch_addr
- next_addr
, blocks
);
2527 /* JUMP_P will only be true at the end of a block. When
2528 branch is already set it means we've previously decided
2529 to propagate a hint for that branch into this block. */
2530 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2533 if ((branch_target
= get_branch_target (insn
)))
2536 branch_addr
= insn_addr
;
2537 required_dist
= spu_hint_dist
;
2541 if (insn
== BB_HEAD (bb
))
2547 /* If we haven't emitted a hint for this branch yet, it might
2548 be profitable to emit it in one of the predecessor blocks,
2549 especially for loops. */
2551 basic_block prev
= 0, prop
= 0, prev2
= 0;
2552 int loop_exit
= 0, simple_loop
= 0;
2553 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2555 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2556 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2557 prev
= EDGE_PRED (bb
, j
)->src
;
2559 prev2
= EDGE_PRED (bb
, j
)->src
;
2561 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2562 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2564 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2567 /* If this branch is a loop exit then propagate to previous
2568 fallthru block. This catches the cases when it is a simple
2569 loop or when there is an initial branch into the loop. */
2570 if (prev
&& (loop_exit
|| simple_loop
)
2571 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2574 /* If there is only one adjacent predecessor. Don't propagate
2575 outside this loop. */
2576 else if (prev
&& single_pred_p (bb
)
2577 && prev
->loop_father
== bb
->loop_father
)
2580 /* If this is the JOIN block of a simple IF-THEN then
2581 propagate the hint to the HEADER block. */
2582 else if (prev
&& prev2
2583 && EDGE_COUNT (bb
->preds
) == 2
2584 && EDGE_COUNT (prev
->preds
) == 1
2585 && EDGE_PRED (prev
, 0)->src
== prev2
2586 && prev2
->loop_father
== bb
->loop_father
2587 && GET_CODE (branch_target
) != REG
)
2590 /* Don't propagate when:
2591 - this is a simple loop and the hint would be too far
2592 - this is not a simple loop and there are 16 insns in
2594 - the predecessor block ends in a branch that will be
2596 - the predecessor block ends in an insn that invalidates
2600 && (bbend
= BB_END (prop
))
2601 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2602 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2603 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2606 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2607 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2608 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2609 INSN_UID (branch
), loop_exit
, simple_loop
,
2610 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2612 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2613 spu_bb_info
[prop
->index
].bb_index
= i
;
2615 else if (branch_addr
- next_addr
>= required_dist
)
2618 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2619 INSN_UID (branch
), bb
->index
,
2620 INSN_UID (NEXT_INSN (insn
)));
2621 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2622 branch_addr
- next_addr
, blocks
);
2629 if (!bitmap_empty_p (blocks
))
2630 find_many_sub_basic_blocks (blocks
);
2632 /* We have to schedule to make sure alignment is ok. */
2633 FOR_EACH_BB_FN (bb
, cfun
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2635 /* The hints need to be scheduled, so call it again. */
2637 df_finish_pass (true);
2643 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2644 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2646 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2647 between its branch label and the branch . We don't move the
2648 label because GCC expects it at the beginning of the block. */
2649 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2650 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2651 rtx_insn
*label
= as_a
<rtx_insn
*> (XEXP (label_ref
, 0));
2654 for (branch
= NEXT_INSN (label
);
2655 !JUMP_P (branch
) && !CALL_P (branch
);
2656 branch
= NEXT_INSN (branch
))
2657 if (NONJUMP_INSN_P (branch
))
2658 offset
+= get_attr_length (branch
);
2660 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2663 spu_var_tracking ();
2665 loop_optimizer_finalize ();
2667 free_bb_for_insn ();
2673 /* Insn scheduling routines, primarily for dual issue. */
2675 spu_sched_issue_rate (void)
2681 uses_ls_unit(rtx_insn
*insn
)
2683 rtx set
= single_set (insn
);
2685 && (GET_CODE (SET_DEST (set
)) == MEM
2686 || GET_CODE (SET_SRC (set
)) == MEM
))
2692 get_pipe (rtx_insn
*insn
)
2695 /* Handle inline asm */
2696 if (INSN_CODE (insn
) == -1)
2698 t
= get_attr_type (insn
);
2723 case TYPE_IPREFETCH
:
2731 /* haifa-sched.c has a static variable that keeps track of the current
2732 cycle. It is passed to spu_sched_reorder, and we record it here for
2733 use by spu_sched_variable_issue. It won't be accurate if the
2734 scheduler updates it's clock_var between the two calls. */
2735 static int clock_var
;
2737 /* This is used to keep track of insn alignment. Set to 0 at the
2738 beginning of each block and increased by the "length" attr of each
2740 static int spu_sched_length
;
2742 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2743 ready list appropriately in spu_sched_reorder(). */
2744 static int pipe0_clock
;
2745 static int pipe1_clock
;
2747 static int prev_clock_var
;
2749 static int prev_priority
;
2751 /* The SPU needs to load the next ilb sometime during the execution of
2752 the previous ilb. There is a potential conflict if every cycle has a
2753 load or store. To avoid the conflict we make sure the load/store
2754 unit is free for at least one cycle during the execution of insns in
2755 the previous ilb. */
2756 static int spu_ls_first
;
2757 static int prev_ls_clock
;
2760 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2761 int max_ready ATTRIBUTE_UNUSED
)
2763 spu_sched_length
= 0;
2767 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2768 int max_ready ATTRIBUTE_UNUSED
)
2770 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2772 /* When any block might be at least 8-byte aligned, assume they
2773 will all be at least 8-byte aligned to make sure dual issue
2774 works out correctly. */
2775 spu_sched_length
= 0;
2777 spu_ls_first
= INT_MAX
;
2782 prev_clock_var
= -1;
2787 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2788 int verbose ATTRIBUTE_UNUSED
,
2789 rtx_insn
*insn
, int more
)
2793 if (GET_CODE (PATTERN (insn
)) == USE
2794 || GET_CODE (PATTERN (insn
)) == CLOBBER
2795 || (len
= get_attr_length (insn
)) == 0)
2798 spu_sched_length
+= len
;
2800 /* Reset on inline asm */
2801 if (INSN_CODE (insn
) == -1)
2803 spu_ls_first
= INT_MAX
;
2808 p
= get_pipe (insn
);
2810 pipe0_clock
= clock_var
;
2812 pipe1_clock
= clock_var
;
2816 if (clock_var
- prev_ls_clock
> 1
2817 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2818 spu_ls_first
= INT_MAX
;
2819 if (uses_ls_unit (insn
))
2821 if (spu_ls_first
== INT_MAX
)
2822 spu_ls_first
= spu_sched_length
;
2823 prev_ls_clock
= clock_var
;
2826 /* The scheduler hasn't inserted the nop, but we will later on.
2827 Include those nops in spu_sched_length. */
2828 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2829 spu_sched_length
+= 4;
2830 prev_clock_var
= clock_var
;
2832 /* more is -1 when called from spu_sched_reorder for new insns
2833 that don't have INSN_PRIORITY */
2835 prev_priority
= INSN_PRIORITY (insn
);
2838 /* Always try issuing more insns. spu_sched_reorder will decide
2839 when the cycle should be advanced. */
2843 /* This function is called for both TARGET_SCHED_REORDER and
2844 TARGET_SCHED_REORDER2. */
2846 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2847 rtx_insn
**ready
, int *nreadyp
, int clock
)
2849 int i
, nready
= *nreadyp
;
2850 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2855 if (nready
<= 0 || pipe1_clock
>= clock
)
2858 /* Find any rtl insns that don't generate assembly insns and schedule
2860 for (i
= nready
- 1; i
>= 0; i
--)
2863 if (INSN_CODE (insn
) == -1
2864 || INSN_CODE (insn
) == CODE_FOR_blockage
2865 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2867 ready
[i
] = ready
[nready
- 1];
2868 ready
[nready
- 1] = insn
;
2873 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2874 for (i
= 0; i
< nready
; i
++)
2875 if (INSN_CODE (ready
[i
]) != -1)
2878 switch (get_attr_type (insn
))
2904 case TYPE_IPREFETCH
:
2910 /* In the first scheduling phase, schedule loads and stores together
2911 to increase the chance they will get merged during postreload CSE. */
2912 if (!reload_completed
&& pipe_ls
>= 0)
2914 insn
= ready
[pipe_ls
];
2915 ready
[pipe_ls
] = ready
[nready
- 1];
2916 ready
[nready
- 1] = insn
;
2920 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2924 /* When we have loads/stores in every cycle of the last 15 insns and
2925 we are about to schedule another load/store, emit an hbrp insn
2928 && spu_sched_length
- spu_ls_first
>= 4 * 15
2929 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2931 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2932 recog_memoized (insn
);
2933 if (pipe0_clock
< clock
)
2934 PUT_MODE (insn
, TImode
);
2935 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2939 /* In general, we want to emit nops to increase dual issue, but dual
2940 issue isn't faster when one of the insns could be scheduled later
2941 without effecting the critical path. We look at INSN_PRIORITY to
2942 make a good guess, but it isn't perfect so -mdual-nops=n can be
2943 used to effect it. */
2944 if (in_spu_reorg
&& spu_dual_nops
< 10)
2946 /* When we are at an even address and we are not issuing nops to
2947 improve scheduling then we need to advance the cycle. */
2948 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2949 && (spu_dual_nops
== 0
2952 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2955 /* When at an odd address, schedule the highest priority insn
2956 without considering pipeline. */
2957 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2958 && (spu_dual_nops
== 0
2960 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2965 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2966 pipe0 insn in the ready list, schedule it. */
2967 if (pipe0_clock
< clock
&& pipe_0
>= 0)
2968 schedule_i
= pipe_0
;
2970 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2971 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2973 schedule_i
= pipe_1
;
2975 if (schedule_i
> -1)
2977 insn
= ready
[schedule_i
];
2978 ready
[schedule_i
] = ready
[nready
- 1];
2979 ready
[nready
- 1] = insn
;
2985 /* INSN is dependent on DEP_INSN. */
2987 spu_sched_adjust_cost (rtx_insn
*insn
, int dep_type
, rtx_insn
*dep_insn
,
2988 int cost
, unsigned int)
2992 /* The blockage pattern is used to prevent instructions from being
2993 moved across it and has no cost. */
2994 if (INSN_CODE (insn
) == CODE_FOR_blockage
2995 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
2998 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
2999 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3002 /* Make sure hbrps are spread out. */
3003 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3004 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3007 /* Make sure hints and hbrps are 2 cycles apart. */
3008 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3009 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3010 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3011 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3014 /* An hbrp has no real dependency on other insns. */
3015 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3016 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3019 /* Assuming that it is unlikely an argument register will be used in
3020 the first cycle of the called function, we reduce the cost for
3021 slightly better scheduling of dep_insn. When not hinted, the
3022 mispredicted branch would hide the cost as well. */
3025 rtx target
= get_branch_target (insn
);
3026 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3031 /* And when returning from a function, let's assume the return values
3032 are completed sooner too. */
3033 if (CALL_P (dep_insn
))
3036 /* Make sure an instruction that loads from the back chain is schedule
3037 away from the return instruction so a hint is more likely to get
3039 if (INSN_CODE (insn
) == CODE_FOR__return
3040 && (set
= single_set (dep_insn
))
3041 && GET_CODE (SET_DEST (set
)) == REG
3042 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3045 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3046 scheduler makes every insn in a block anti-dependent on the final
3047 jump_insn. We adjust here so higher cost insns will get scheduled
3049 if (JUMP_P (insn
) && dep_type
== REG_DEP_ANTI
)
3050 return insn_cost (dep_insn
) - 3;
3055 /* Create a CONST_DOUBLE from a string. */
3057 spu_float_const (const char *string
, machine_mode mode
)
3059 REAL_VALUE_TYPE value
;
3060 value
= REAL_VALUE_ATOF (string
, mode
);
3061 return const_double_from_real_value (value
, mode
);
3065 spu_constant_address_p (rtx x
)
3067 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3068 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3069 || GET_CODE (x
) == HIGH
);
3072 static enum spu_immediate
3073 which_immediate_load (HOST_WIDE_INT val
)
3075 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3077 if (val
>= -0x8000 && val
<= 0x7fff)
3079 if (val
>= 0 && val
<= 0x3ffff)
3081 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3083 if ((val
& 0xffff) == 0)
3089 /* Return true when OP can be loaded by one of the il instructions, or
3090 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3092 immediate_load_p (rtx op
, machine_mode mode
)
3094 if (CONSTANT_P (op
))
3096 enum immediate_class c
= classify_immediate (op
, mode
);
3097 return c
== IC_IL1
|| c
== IC_IL1s
3098 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3103 /* Return true if the first SIZE bytes of arr is a constant that can be
3104 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3105 represent the size and offset of the instruction to use. */
3107 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3109 int cpat
, run
, i
, start
;
3113 for (i
= 0; i
< size
&& cpat
; i
++)
3121 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3123 else if (arr
[i
] == 0)
3125 while (arr
[i
+run
] == run
&& i
+run
< 16)
3127 if (run
!= 4 && run
!= 8)
3132 if ((i
& (run
-1)) != 0)
3139 if (cpat
&& (run
|| size
< 16))
3146 *pstart
= start
== -1 ? 16-run
: start
;
3152 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3153 it into a register. MODE is only valid when OP is a CONST_INT. */
3154 static enum immediate_class
3155 classify_immediate (rtx op
, machine_mode mode
)
3158 unsigned char arr
[16];
3159 int i
, j
, repeated
, fsmbi
, repeat
;
3161 gcc_assert (CONSTANT_P (op
));
3163 if (GET_MODE (op
) != VOIDmode
)
3164 mode
= GET_MODE (op
);
3166 /* A V4SI const_vector with all identical symbols is ok. */
3169 && GET_CODE (op
) == CONST_VECTOR
3170 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3171 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
)
3172 op
= unwrap_const_vec_duplicate (op
);
3174 switch (GET_CODE (op
))
3178 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3181 /* We can never know if the resulting address fits in 18 bits and can be
3182 loaded with ila. For now, assume the address will not overflow if
3183 the displacement is "small" (fits 'K' constraint). */
3184 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3186 rtx sym
= XEXP (XEXP (op
, 0), 0);
3187 rtx cst
= XEXP (XEXP (op
, 0), 1);
3189 if (GET_CODE (sym
) == SYMBOL_REF
3190 && GET_CODE (cst
) == CONST_INT
3191 && satisfies_constraint_K (cst
))
3200 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3201 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3202 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3208 constant_to_array (mode
, op
, arr
);
3210 /* Check that each 4-byte slot is identical. */
3212 for (i
= 4; i
< 16; i
+= 4)
3213 for (j
= 0; j
< 4; j
++)
3214 if (arr
[j
] != arr
[i
+ j
])
3219 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3220 val
= trunc_int_for_mode (val
, SImode
);
3222 if (which_immediate_load (val
) != SPU_NONE
)
3226 /* Any mode of 2 bytes or smaller can be loaded with an il
3228 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3232 for (i
= 0; i
< 16 && fsmbi
; i
++)
3233 if (arr
[i
] != 0 && repeat
== 0)
3235 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3238 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3240 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3253 static enum spu_immediate
3254 which_logical_immediate (HOST_WIDE_INT val
)
3256 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3258 if (val
>= -0x200 && val
<= 0x1ff)
3260 if (val
>= 0 && val
<= 0xffff)
3262 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3264 val
= trunc_int_for_mode (val
, HImode
);
3265 if (val
>= -0x200 && val
<= 0x1ff)
3267 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3269 val
= trunc_int_for_mode (val
, QImode
);
3270 if (val
>= -0x200 && val
<= 0x1ff)
3277 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3280 const_vector_immediate_p (rtx x
)
3283 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3284 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3285 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3286 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3292 logical_immediate_p (rtx op
, machine_mode mode
)
3295 unsigned char arr
[16];
3298 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3299 || GET_CODE (op
) == CONST_VECTOR
);
3301 if (GET_CODE (op
) == CONST_VECTOR
3302 && !const_vector_immediate_p (op
))
3305 if (GET_MODE (op
) != VOIDmode
)
3306 mode
= GET_MODE (op
);
3308 constant_to_array (mode
, op
, arr
);
3310 /* Check that bytes are repeated. */
3311 for (i
= 4; i
< 16; i
+= 4)
3312 for (j
= 0; j
< 4; j
++)
3313 if (arr
[j
] != arr
[i
+ j
])
3316 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3317 val
= trunc_int_for_mode (val
, SImode
);
3319 i
= which_logical_immediate (val
);
3320 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3324 iohl_immediate_p (rtx op
, machine_mode mode
)
3327 unsigned char arr
[16];
3330 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3331 || GET_CODE (op
) == CONST_VECTOR
);
3333 if (GET_CODE (op
) == CONST_VECTOR
3334 && !const_vector_immediate_p (op
))
3337 if (GET_MODE (op
) != VOIDmode
)
3338 mode
= GET_MODE (op
);
3340 constant_to_array (mode
, op
, arr
);
3342 /* Check that bytes are repeated. */
3343 for (i
= 4; i
< 16; i
+= 4)
3344 for (j
= 0; j
< 4; j
++)
3345 if (arr
[j
] != arr
[i
+ j
])
3348 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3349 val
= trunc_int_for_mode (val
, SImode
);
3351 return val
>= 0 && val
<= 0xffff;
3355 arith_immediate_p (rtx op
, machine_mode mode
,
3356 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3359 unsigned char arr
[16];
3362 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3363 || GET_CODE (op
) == CONST_VECTOR
);
3365 if (GET_CODE (op
) == CONST_VECTOR
3366 && !const_vector_immediate_p (op
))
3369 if (GET_MODE (op
) != VOIDmode
)
3370 mode
= GET_MODE (op
);
3372 constant_to_array (mode
, op
, arr
);
3374 bytes
= GET_MODE_UNIT_SIZE (mode
);
3375 mode
= mode_for_size (GET_MODE_UNIT_BITSIZE (mode
), MODE_INT
, 0);
3377 /* Check that bytes are repeated. */
3378 for (i
= bytes
; i
< 16; i
+= bytes
)
3379 for (j
= 0; j
< bytes
; j
++)
3380 if (arr
[j
] != arr
[i
+ j
])
3384 for (j
= 1; j
< bytes
; j
++)
3385 val
= (val
<< 8) | arr
[j
];
3387 val
= trunc_int_for_mode (val
, mode
);
3389 return val
>= low
&& val
<= high
;
3392 /* TRUE when op is an immediate and an exact power of 2, and given that
3393 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3394 all entries must be the same. */
3396 exp2_immediate_p (rtx op
, machine_mode mode
, int low
, int high
)
3398 machine_mode int_mode
;
3400 unsigned char arr
[16];
3403 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3404 || GET_CODE (op
) == CONST_VECTOR
);
3406 if (GET_CODE (op
) == CONST_VECTOR
3407 && !const_vector_immediate_p (op
))
3410 if (GET_MODE (op
) != VOIDmode
)
3411 mode
= GET_MODE (op
);
3413 constant_to_array (mode
, op
, arr
);
3415 mode
= GET_MODE_INNER (mode
);
3417 bytes
= GET_MODE_SIZE (mode
);
3418 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3420 /* Check that bytes are repeated. */
3421 for (i
= bytes
; i
< 16; i
+= bytes
)
3422 for (j
= 0; j
< bytes
; j
++)
3423 if (arr
[j
] != arr
[i
+ j
])
3427 for (j
= 1; j
< bytes
; j
++)
3428 val
= (val
<< 8) | arr
[j
];
3430 val
= trunc_int_for_mode (val
, int_mode
);
3432 /* Currently, we only handle SFmode */
3433 gcc_assert (mode
== SFmode
);
3436 int exp
= (val
>> 23) - 127;
3437 return val
> 0 && (val
& 0x007fffff) == 0
3438 && exp
>= low
&& exp
<= high
;
3443 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3446 ea_symbol_ref_p (const_rtx x
)
3450 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3452 rtx plus
= XEXP (x
, 0);
3453 rtx op0
= XEXP (plus
, 0);
3454 rtx op1
= XEXP (plus
, 1);
3455 if (GET_CODE (op1
) == CONST_INT
)
3459 return (GET_CODE (x
) == SYMBOL_REF
3460 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3461 && TREE_CODE (decl
) == VAR_DECL
3462 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3466 - any 32-bit constant (SImode, SFmode)
3467 - any constant that can be generated with fsmbi (any mode)
3468 - a 64-bit constant where the high and low bits are identical
3470 - a 128-bit constant where the four 32-bit words match. */
3472 spu_legitimate_constant_p (machine_mode mode
, rtx x
)
3474 subrtx_iterator::array_type array
;
3475 if (GET_CODE (x
) == HIGH
)
3478 /* Reject any __ea qualified reference. These can't appear in
3479 instructions but must be forced to the constant pool. */
3480 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3481 if (ea_symbol_ref_p (*iter
))
3484 /* V4SI with all identical symbols is valid. */
3487 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3488 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3489 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3490 return const_vec_duplicate_p (x
);
3492 if (GET_CODE (x
) == CONST_VECTOR
3493 && !const_vector_immediate_p (x
))
3498 /* Valid address are:
3499 - symbol_ref, label_ref, const
3501 - reg + const_int, where const_int is 16 byte aligned
3502 - reg + reg, alignment doesn't matter
3503 The alignment matters in the reg+const case because lqd and stqd
3504 ignore the 4 least significant bits of the const. We only care about
3505 16 byte modes because the expand phase will change all smaller MEM
3506 references to TImode. */
3508 spu_legitimate_address_p (machine_mode mode
,
3509 rtx x
, bool reg_ok_strict
)
3511 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3513 && GET_CODE (x
) == AND
3514 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3515 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3517 switch (GET_CODE (x
))
3520 return !TARGET_LARGE_MEM
;
3524 /* Keep __ea references until reload so that spu_expand_mov can see them
3526 if (ea_symbol_ref_p (x
))
3527 return !reload_in_progress
&& !reload_completed
;
3528 return !TARGET_LARGE_MEM
;
3531 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3540 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3545 rtx op0
= XEXP (x
, 0);
3546 rtx op1
= XEXP (x
, 1);
3547 if (GET_CODE (op0
) == SUBREG
)
3548 op0
= XEXP (op0
, 0);
3549 if (GET_CODE (op1
) == SUBREG
)
3550 op1
= XEXP (op1
, 0);
3551 if (GET_CODE (op0
) == REG
3552 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3553 && GET_CODE (op1
) == CONST_INT
3554 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3555 /* If virtual registers are involved, the displacement will
3556 change later on anyway, so checking would be premature.
3557 Reload will make sure the final displacement after
3558 register elimination is OK. */
3559 || op0
== arg_pointer_rtx
3560 || op0
== frame_pointer_rtx
3561 || op0
== virtual_stack_vars_rtx
)
3562 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3564 if (GET_CODE (op0
) == REG
3565 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3566 && GET_CODE (op1
) == REG
3567 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3578 /* Like spu_legitimate_address_p, except with named addresses. */
3580 spu_addr_space_legitimate_address_p (machine_mode mode
, rtx x
,
3581 bool reg_ok_strict
, addr_space_t as
)
3583 if (as
== ADDR_SPACE_EA
)
3584 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3586 else if (as
!= ADDR_SPACE_GENERIC
)
3589 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3592 /* When the address is reg + const_int, force the const_int into a
3595 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3596 machine_mode mode ATTRIBUTE_UNUSED
)
3599 /* Make sure both operands are registers. */
3600 if (GET_CODE (x
) == PLUS
)
3604 if (ALIGNED_SYMBOL_REF_P (op0
))
3606 op0
= force_reg (Pmode
, op0
);
3607 mark_reg_pointer (op0
, 128);
3609 else if (GET_CODE (op0
) != REG
)
3610 op0
= force_reg (Pmode
, op0
);
3611 if (ALIGNED_SYMBOL_REF_P (op1
))
3613 op1
= force_reg (Pmode
, op1
);
3614 mark_reg_pointer (op1
, 128);
3616 else if (GET_CODE (op1
) != REG
)
3617 op1
= force_reg (Pmode
, op1
);
3618 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3623 /* Like spu_legitimate_address, except with named address support. */
3625 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
,
3628 if (as
!= ADDR_SPACE_GENERIC
)
3631 return spu_legitimize_address (x
, oldx
, mode
);
3634 /* Reload reg + const_int for out-of-range displacements. */
3636 spu_legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
3637 int opnum
, int type
)
3639 bool removed_and
= false;
3641 if (GET_CODE (ad
) == AND
3642 && CONST_INT_P (XEXP (ad
, 1))
3643 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3649 if (GET_CODE (ad
) == PLUS
3650 && REG_P (XEXP (ad
, 0))
3651 && CONST_INT_P (XEXP (ad
, 1))
3652 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3653 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3655 /* Unshare the sum. */
3658 /* Reload the displacement. */
3659 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3660 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3661 opnum
, (enum reload_type
) type
);
3663 /* Add back AND for alignment if we stripped it. */
3665 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3673 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3674 struct attribute_spec.handler. */
3676 spu_handle_fndecl_attribute (tree
* node
,
3678 tree args ATTRIBUTE_UNUSED
,
3679 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3681 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3683 warning (0, "%qE attribute only applies to functions",
3685 *no_add_attrs
= true;
3691 /* Handle the "vector" attribute. */
3693 spu_handle_vector_attribute (tree
* node
, tree name
,
3694 tree args ATTRIBUTE_UNUSED
,
3695 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3697 tree type
= *node
, result
= NULL_TREE
;
3701 while (POINTER_TYPE_P (type
)
3702 || TREE_CODE (type
) == FUNCTION_TYPE
3703 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3704 type
= TREE_TYPE (type
);
3706 mode
= TYPE_MODE (type
);
3708 unsigned_p
= TYPE_UNSIGNED (type
);
3712 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3715 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3718 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3721 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3724 result
= V4SF_type_node
;
3727 result
= V2DF_type_node
;
3733 /* Propagate qualifiers attached to the element type
3734 onto the vector type. */
3735 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3736 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3738 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3741 warning (0, "%qE attribute ignored", name
);
3743 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3748 /* Return nonzero if FUNC is a naked function. */
3750 spu_naked_function_p (tree func
)
3754 if (TREE_CODE (func
) != FUNCTION_DECL
)
3757 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3758 return a
!= NULL_TREE
;
3762 spu_initial_elimination_offset (int from
, int to
)
3764 int saved_regs_size
= spu_saved_regs_size ();
3766 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3767 || get_frame_size () || saved_regs_size
)
3768 sp_offset
= STACK_POINTER_OFFSET
;
3769 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3770 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3771 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3772 return get_frame_size ();
3773 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3774 return sp_offset
+ crtl
->outgoing_args_size
3775 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3776 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3777 return get_frame_size () + saved_regs_size
+ sp_offset
;
3783 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3785 machine_mode mode
= TYPE_MODE (type
);
3786 int byte_size
= ((mode
== BLKmode
)
3787 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3789 /* Make sure small structs are left justified in a register. */
3790 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3791 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3796 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3797 int n
= byte_size
/ UNITS_PER_WORD
;
3798 v
= rtvec_alloc (nregs
);
3799 for (i
= 0; i
< n
; i
++)
3801 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3802 gen_rtx_REG (TImode
,
3805 GEN_INT (UNITS_PER_WORD
* i
));
3806 byte_size
-= UNITS_PER_WORD
;
3813 smode
= smallest_int_mode_for_size (byte_size
* BITS_PER_UNIT
);
3815 gen_rtx_EXPR_LIST (VOIDmode
,
3816 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3817 GEN_INT (UNITS_PER_WORD
* n
));
3819 return gen_rtx_PARALLEL (mode
, v
);
3821 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3825 spu_function_arg (cumulative_args_t cum_v
,
3827 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3829 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3832 if (*cum
>= MAX_REGISTER_ARGS
)
3835 byte_size
= ((mode
== BLKmode
)
3836 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3838 /* The ABI does not allow parameters to be passed partially in
3839 reg and partially in stack. */
3840 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3843 /* Make sure small structs are left justified in a register. */
3844 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3845 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3851 smode
= smallest_int_mode_for_size (byte_size
* BITS_PER_UNIT
);
3852 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3853 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3855 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3858 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3862 spu_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
3863 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3865 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3867 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3870 ? ((int_size_in_bytes (type
) + 15) / 16)
3873 : HARD_REGNO_NREGS (cum
, mode
));
3876 /* Implement TARGET_FUNCTION_ARG_PADDING. */
3878 static pad_direction
3879 spu_function_arg_padding (machine_mode
, const_tree
)
3884 /* Variable sized types are passed by reference. */
3886 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3887 machine_mode mode ATTRIBUTE_UNUSED
,
3888 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3890 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3896 /* Create and return the va_list datatype.
3898 On SPU, va_list is an array type equivalent to
3900 typedef struct __va_list_tag
3902 void *__args __attribute__((__aligned(16)));
3903 void *__skip __attribute__((__aligned(16)));
3907 where __args points to the arg that will be returned by the next
3908 va_arg(), and __skip points to the previous stack frame such that
3909 when __args == __skip we should advance __args by 32 bytes. */
3911 spu_build_builtin_va_list (void)
3913 tree f_args
, f_skip
, record
, type_decl
;
3916 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3919 build_decl (BUILTINS_LOCATION
,
3920 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3922 f_args
= build_decl (BUILTINS_LOCATION
,
3923 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3924 f_skip
= build_decl (BUILTINS_LOCATION
,
3925 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3927 DECL_FIELD_CONTEXT (f_args
) = record
;
3928 SET_DECL_ALIGN (f_args
, 128);
3929 DECL_USER_ALIGN (f_args
) = 1;
3931 DECL_FIELD_CONTEXT (f_skip
) = record
;
3932 SET_DECL_ALIGN (f_skip
, 128);
3933 DECL_USER_ALIGN (f_skip
) = 1;
3935 TYPE_STUB_DECL (record
) = type_decl
;
3936 TYPE_NAME (record
) = type_decl
;
3937 TYPE_FIELDS (record
) = f_args
;
3938 DECL_CHAIN (f_args
) = f_skip
;
3940 /* We know this is being padded and we want it too. It is an internal
3941 type so hide the warnings from the user. */
3943 warn_padded
= false;
3945 layout_type (record
);
3949 /* The correct type is an array type of one element. */
3950 return build_array_type (record
, build_index_type (size_zero_node
));
3953 /* Implement va_start by filling the va_list structure VALIST.
3954 NEXTARG points to the first anonymous stack argument.
3956 The following global variables are used to initialize
3957 the va_list structure:
3960 the CUMULATIVE_ARGS for this function
3962 crtl->args.arg_offset_rtx:
3963 holds the offset of the first anonymous stack argument
3964 (relative to the virtual arg pointer). */
3967 spu_va_start (tree valist
, rtx nextarg
)
3969 tree f_args
, f_skip
;
3972 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3973 f_skip
= DECL_CHAIN (f_args
);
3975 valist
= build_simple_mem_ref (valist
);
3977 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3979 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3981 /* Find the __args area. */
3982 t
= make_tree (TREE_TYPE (args
), nextarg
);
3983 if (crtl
->args
.pretend_args_size
> 0)
3984 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
3985 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
3986 TREE_SIDE_EFFECTS (t
) = 1;
3987 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3989 /* Find the __skip area. */
3990 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
3991 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
3992 - STACK_POINTER_OFFSET
));
3993 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
3994 TREE_SIDE_EFFECTS (t
) = 1;
3995 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3998 /* Gimplify va_arg by updating the va_list structure
3999 VALIST as required to retrieve an argument of type
4000 TYPE, and returning that argument.
4002 ret = va_arg(VALIST, TYPE);
4004 generates code equivalent to:
4006 paddedsize = (sizeof(TYPE) + 15) & -16;
4007 if (VALIST.__args + paddedsize > VALIST.__skip
4008 && VALIST.__args <= VALIST.__skip)
4009 addr = VALIST.__skip + 32;
4011 addr = VALIST.__args;
4012 VALIST.__args = addr + paddedsize;
4013 ret = *(TYPE *)addr;
4016 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4017 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4019 tree f_args
, f_skip
;
4021 HOST_WIDE_INT size
, rsize
;
4023 bool pass_by_reference_p
;
4025 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4026 f_skip
= DECL_CHAIN (f_args
);
4029 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4031 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4033 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4035 /* if an object is dynamically sized, a pointer to it is passed
4036 instead of the object itself. */
4037 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4039 if (pass_by_reference_p
)
4040 type
= build_pointer_type (type
);
4041 size
= int_size_in_bytes (type
);
4042 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4044 /* build conditional expression to calculate addr. The expression
4045 will be gimplified later. */
4046 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4047 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4048 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4049 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4050 unshare_expr (skip
)));
4052 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4053 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4054 unshare_expr (args
));
4056 gimplify_assign (addr
, tmp
, pre_p
);
4058 /* update VALIST.__args */
4059 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4060 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4062 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4065 if (pass_by_reference_p
)
4066 addr
= build_va_arg_indirect_ref (addr
);
4068 return build_va_arg_indirect_ref (addr
);
4071 /* Save parameter registers starting with the register that corresponds
4072 to the first unnamed parameters. If the first unnamed parameter is
4073 in the stack then save no registers. Set pretend_args_size to the
4074 amount of space needed to save the registers. */
4076 spu_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4077 tree type
, int *pretend_size
, int no_rtl
)
4084 int ncum
= *get_cumulative_args (cum
);
4086 /* cum currently points to the last named argument, we want to
4087 start at the next argument. */
4088 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4090 offset
= -STACK_POINTER_OFFSET
;
4091 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4093 tmp
= gen_frame_mem (V4SImode
,
4094 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4096 emit_move_insn (tmp
,
4097 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4100 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4105 spu_conditional_register_usage (void)
4109 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4110 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4114 /* This is called any time we inspect the alignment of a register for
4117 reg_aligned_for_addr (rtx x
)
4120 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4121 return REGNO_POINTER_ALIGN (regno
) >= 128;
4124 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4125 into its SYMBOL_REF_FLAGS. */
4127 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4129 default_encode_section_info (decl
, rtl
, first
);
4131 /* If a variable has a forced alignment to < 16 bytes, mark it with
4132 SYMBOL_FLAG_ALIGN1. */
4133 if (TREE_CODE (decl
) == VAR_DECL
4134 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4135 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4138 /* Return TRUE if we are certain the mem refers to a complete object
4139 which is both 16-byte aligned and padded to a 16-byte boundary. This
4140 would make it safe to store with a single instruction.
4141 We guarantee the alignment and padding for static objects by aligning
4142 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4143 FIXME: We currently cannot guarantee this for objects on the stack
4144 because assign_parm_setup_stack calls assign_stack_local with the
4145 alignment of the parameter mode and in that case the alignment never
4146 gets adjusted by LOCAL_ALIGNMENT. */
4148 store_with_one_insn_p (rtx mem
)
4150 machine_mode mode
= GET_MODE (mem
);
4151 rtx addr
= XEXP (mem
, 0);
4152 if (mode
== BLKmode
)
4154 if (GET_MODE_SIZE (mode
) >= 16)
4156 /* Only static objects. */
4157 if (GET_CODE (addr
) == SYMBOL_REF
)
4159 /* We use the associated declaration to make sure the access is
4160 referring to the whole object.
4161 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4162 if it is necessary. Will there be cases where one exists, and
4163 the other does not? Will there be cases where both exist, but
4164 have different types? */
4165 tree decl
= MEM_EXPR (mem
);
4167 && TREE_CODE (decl
) == VAR_DECL
4168 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4170 decl
= SYMBOL_REF_DECL (addr
);
4172 && TREE_CODE (decl
) == VAR_DECL
4173 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4179 /* Return 1 when the address is not valid for a simple load and store as
4180 required by the '_mov*' patterns. We could make this less strict
4181 for loads, but we prefer mem's to look the same so they are more
4182 likely to be merged. */
4184 address_needs_split (rtx mem
)
4186 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4187 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4188 || !(store_with_one_insn_p (mem
)
4189 || mem_is_padded_component_ref (mem
))))
4195 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4196 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4197 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4199 /* MEM is known to be an __ea qualified memory access. Emit a call to
4200 fetch the ppu memory to local store, and return its address in local
4204 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4208 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4209 if (!cache_fetch_dirty
)
4210 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4211 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4212 ea_addr
, EAmode
, ndirty
, SImode
);
4217 cache_fetch
= init_one_libfunc ("__cache_fetch");
4218 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4223 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4224 dirty bit marking, inline.
4226 The cache control data structure is an array of
4228 struct __cache_tag_array
4230 unsigned int tag_lo[4];
4231 unsigned int tag_hi[4];
4232 void *data_pointer[4];
4234 vector unsigned short dirty_bits[4];
4238 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4242 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4243 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4244 rtx index_mask
= gen_reg_rtx (SImode
);
4245 rtx tag_arr
= gen_reg_rtx (Pmode
);
4246 rtx splat_mask
= gen_reg_rtx (TImode
);
4247 rtx splat
= gen_reg_rtx (V4SImode
);
4248 rtx splat_hi
= NULL_RTX
;
4249 rtx tag_index
= gen_reg_rtx (Pmode
);
4250 rtx block_off
= gen_reg_rtx (SImode
);
4251 rtx tag_addr
= gen_reg_rtx (Pmode
);
4252 rtx tag
= gen_reg_rtx (V4SImode
);
4253 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4254 rtx cache_tag_hi
= NULL_RTX
;
4255 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4256 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4257 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4258 rtx tag_equal_hi
= NULL_RTX
;
4259 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4260 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4261 rtx eq_index
= gen_reg_rtx (SImode
);
4262 rtx bcomp
, hit_label
, hit_ref
, cont_label
;
4265 if (spu_ea_model
!= 32)
4267 splat_hi
= gen_reg_rtx (V4SImode
);
4268 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4269 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4272 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4273 emit_move_insn (tag_arr
, tag_arr_sym
);
4274 v
= 0x0001020300010203LL
;
4275 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4276 ea_addr_si
= ea_addr
;
4277 if (spu_ea_model
!= 32)
4278 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4280 /* tag_index = ea_addr & (tag_array_size - 128) */
4281 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4283 /* splat ea_addr to all 4 slots. */
4284 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4285 /* Similarly for high 32 bits of ea_addr. */
4286 if (spu_ea_model
!= 32)
4287 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4289 /* block_off = ea_addr & 127 */
4290 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4292 /* tag_addr = tag_arr + tag_index */
4293 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4295 /* Read cache tags. */
4296 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4297 if (spu_ea_model
!= 32)
4298 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4299 plus_constant (Pmode
,
4302 /* tag = ea_addr & -128 */
4303 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4305 /* Read all four cache data pointers. */
4306 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4307 plus_constant (Pmode
,
4311 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4312 if (spu_ea_model
!= 32)
4314 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4315 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4318 /* At most one of the tags compare equal, so tag_equal has one
4319 32-bit slot set to all 1's, with the other slots all zero.
4320 gbb picks off low bit from each byte in the 128-bit registers,
4321 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4323 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4324 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4326 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4327 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4329 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4330 (rotating eq_index mod 16 bytes). */
4331 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4332 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4334 /* Add block offset to form final data address. */
4335 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4337 /* Check that we did hit. */
4338 hit_label
= gen_label_rtx ();
4339 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4340 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4341 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
,
4342 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4344 /* Say that this branch is very likely to happen. */
4345 add_reg_br_prob_note (insn
, profile_probability::very_likely ());
4347 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4348 cont_label
= gen_label_rtx ();
4349 emit_jump_insn (gen_jump (cont_label
));
4352 emit_label (hit_label
);
4357 rtx dirty_bits
= gen_reg_rtx (TImode
);
4358 rtx dirty_off
= gen_reg_rtx (SImode
);
4359 rtx dirty_128
= gen_reg_rtx (TImode
);
4360 rtx neg_block_off
= gen_reg_rtx (SImode
);
4362 /* Set up mask with one dirty bit per byte of the mem we are
4363 writing, starting from top bit. */
4365 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4366 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4371 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4373 /* Form index into cache dirty_bits. eq_index is one of
4374 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4375 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4376 offset to each of the four dirty_bits elements. */
4377 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4379 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4381 /* Rotate bit mask to proper bit. */
4382 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4383 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4384 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4386 /* Or in the new dirty bits. */
4387 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4390 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4393 emit_label (cont_label
);
4397 expand_ea_mem (rtx mem
, bool is_store
)
4400 rtx data_addr
= gen_reg_rtx (Pmode
);
4403 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4404 if (optimize_size
|| optimize
== 0)
4405 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4407 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4409 if (ea_alias_set
== -1)
4410 ea_alias_set
= new_alias_set ();
4412 /* We generate a new MEM RTX to refer to the copy of the data
4413 in the cache. We do not copy memory attributes (except the
4414 alignment) from the original MEM, as they may no longer apply
4415 to the cache copy. */
4416 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4417 set_mem_alias_set (new_mem
, ea_alias_set
);
4418 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4424 spu_expand_mov (rtx
* ops
, machine_mode mode
)
4426 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4428 /* Perform the move in the destination SUBREG's inner mode. */
4429 ops
[0] = SUBREG_REG (ops
[0]);
4430 mode
= GET_MODE (ops
[0]);
4431 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4432 gcc_assert (ops
[1]);
4435 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4437 rtx from
= SUBREG_REG (ops
[1]);
4438 scalar_int_mode imode
= int_mode_for_mode (GET_MODE (from
)).require ();
4440 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4441 && GET_MODE_CLASS (imode
) == MODE_INT
4442 && subreg_lowpart_p (ops
[1]));
4444 if (GET_MODE_SIZE (imode
) < 4)
4446 if (imode
!= GET_MODE (from
))
4447 from
= gen_rtx_SUBREG (imode
, from
, 0);
4449 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4451 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4453 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4456 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4460 /* At least one of the operands needs to be a register. */
4461 if ((reload_in_progress
| reload_completed
) == 0
4462 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4464 rtx temp
= force_reg (mode
, ops
[1]);
4465 emit_move_insn (ops
[0], temp
);
4468 if (reload_in_progress
|| reload_completed
)
4470 if (CONSTANT_P (ops
[1]))
4471 return spu_split_immediate (ops
);
4475 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4477 if (GET_CODE (ops
[1]) == CONST_INT
)
4479 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4480 if (val
!= INTVAL (ops
[1]))
4482 emit_move_insn (ops
[0], GEN_INT (val
));
4488 if (MEM_ADDR_SPACE (ops
[0]))
4489 ops
[0] = expand_ea_mem (ops
[0], true);
4490 return spu_split_store (ops
);
4494 if (MEM_ADDR_SPACE (ops
[1]))
4495 ops
[1] = expand_ea_mem (ops
[1], false);
4496 return spu_split_load (ops
);
4503 spu_convert_move (rtx dst
, rtx src
)
4505 machine_mode mode
= GET_MODE (dst
);
4506 machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4508 gcc_assert (GET_MODE (src
) == TImode
);
4509 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4510 emit_insn (gen_rtx_SET (reg
,
4511 gen_rtx_TRUNCATE (int_mode
,
4512 gen_rtx_LSHIFTRT (TImode
, src
,
4513 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4514 if (int_mode
!= mode
)
4516 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4517 emit_move_insn (dst
, reg
);
4521 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4522 the address from SRC and SRC+16. Return a REG or CONST_INT that
4523 specifies how many bytes to rotate the loaded registers, plus any
4524 extra from EXTRA_ROTQBY. The address and rotate amounts are
4525 normalized to improve merging of loads and rotate computations. */
4527 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4529 rtx addr
= XEXP (src
, 0);
4530 rtx p0
, p1
, rot
, addr0
, addr1
;
4536 if (MEM_ALIGN (src
) >= 128)
4537 /* Address is already aligned; simply perform a TImode load. */ ;
4538 else if (GET_CODE (addr
) == PLUS
)
4541 aligned reg + aligned reg => lqx
4542 aligned reg + unaligned reg => lqx, rotqby
4543 aligned reg + aligned const => lqd
4544 aligned reg + unaligned const => lqd, rotqbyi
4545 unaligned reg + aligned reg => lqx, rotqby
4546 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4547 unaligned reg + aligned const => lqd, rotqby
4548 unaligned reg + unaligned const -> not allowed by legitimate address
4550 p0
= XEXP (addr
, 0);
4551 p1
= XEXP (addr
, 1);
4552 if (!reg_aligned_for_addr (p0
))
4554 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4556 rot
= gen_reg_rtx (SImode
);
4557 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4559 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4563 && INTVAL (p1
) * BITS_PER_UNIT
4564 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4566 rot
= gen_reg_rtx (SImode
);
4567 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4572 rtx x
= gen_reg_rtx (SImode
);
4573 emit_move_insn (x
, p1
);
4574 if (!spu_arith_operand (p1
, SImode
))
4576 rot
= gen_reg_rtx (SImode
);
4577 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4578 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4586 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4588 rot_amt
= INTVAL (p1
) & 15;
4589 if (INTVAL (p1
) & -16)
4591 p1
= GEN_INT (INTVAL (p1
) & -16);
4592 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4597 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4601 else if (REG_P (addr
))
4603 if (!reg_aligned_for_addr (addr
))
4606 else if (GET_CODE (addr
) == CONST
)
4608 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4609 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4610 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4612 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4614 addr
= gen_rtx_CONST (Pmode
,
4615 gen_rtx_PLUS (Pmode
,
4616 XEXP (XEXP (addr
, 0), 0),
4617 GEN_INT (rot_amt
& -16)));
4619 addr
= XEXP (XEXP (addr
, 0), 0);
4623 rot
= gen_reg_rtx (Pmode
);
4624 emit_move_insn (rot
, addr
);
4627 else if (GET_CODE (addr
) == CONST_INT
)
4629 rot_amt
= INTVAL (addr
);
4630 addr
= GEN_INT (rot_amt
& -16);
4632 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4634 rot
= gen_reg_rtx (Pmode
);
4635 emit_move_insn (rot
, addr
);
4638 rot_amt
+= extra_rotby
;
4644 rtx x
= gen_reg_rtx (SImode
);
4645 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4649 if (!rot
&& rot_amt
)
4650 rot
= GEN_INT (rot_amt
);
4652 addr0
= copy_rtx (addr
);
4653 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4654 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4658 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4659 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4660 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4667 spu_split_load (rtx
* ops
)
4669 machine_mode mode
= GET_MODE (ops
[0]);
4670 rtx addr
, load
, rot
;
4673 if (GET_MODE_SIZE (mode
) >= 16)
4676 addr
= XEXP (ops
[1], 0);
4677 gcc_assert (GET_CODE (addr
) != AND
);
4679 if (!address_needs_split (ops
[1]))
4681 ops
[1] = change_address (ops
[1], TImode
, addr
);
4682 load
= gen_reg_rtx (TImode
);
4683 emit_insn (gen__movti (load
, ops
[1]));
4684 spu_convert_move (ops
[0], load
);
4688 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4690 load
= gen_reg_rtx (TImode
);
4691 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4694 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4696 spu_convert_move (ops
[0], load
);
4701 spu_split_store (rtx
* ops
)
4703 machine_mode mode
= GET_MODE (ops
[0]);
4705 rtx addr
, p0
, p1
, p1_lo
, smem
;
4709 if (GET_MODE_SIZE (mode
) >= 16)
4712 addr
= XEXP (ops
[0], 0);
4713 gcc_assert (GET_CODE (addr
) != AND
);
4715 if (!address_needs_split (ops
[0]))
4717 reg
= gen_reg_rtx (TImode
);
4718 emit_insn (gen_spu_convert (reg
, ops
[1]));
4719 ops
[0] = change_address (ops
[0], TImode
, addr
);
4720 emit_move_insn (ops
[0], reg
);
4724 if (GET_CODE (addr
) == PLUS
)
4727 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4728 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4729 aligned reg + aligned const => lqd, c?d, shuf, stqx
4730 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4731 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4732 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4733 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4734 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4737 p0
= XEXP (addr
, 0);
4738 p1
= p1_lo
= XEXP (addr
, 1);
4739 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4741 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4742 if (reg_aligned_for_addr (p0
))
4744 p1
= GEN_INT (INTVAL (p1
) & -16);
4745 if (p1
== const0_rtx
)
4748 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4752 rtx x
= gen_reg_rtx (SImode
);
4753 emit_move_insn (x
, p1
);
4754 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4758 else if (REG_P (addr
))
4762 p1
= p1_lo
= const0_rtx
;
4767 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4768 p1
= 0; /* aform doesn't use p1 */
4770 if (ALIGNED_SYMBOL_REF_P (addr
))
4772 else if (GET_CODE (addr
) == CONST
4773 && GET_CODE (XEXP (addr
, 0)) == PLUS
4774 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4775 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4777 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4779 addr
= gen_rtx_CONST (Pmode
,
4780 gen_rtx_PLUS (Pmode
,
4781 XEXP (XEXP (addr
, 0), 0),
4782 GEN_INT (v
& -16)));
4784 addr
= XEXP (XEXP (addr
, 0), 0);
4785 p1_lo
= GEN_INT (v
& 15);
4787 else if (GET_CODE (addr
) == CONST_INT
)
4789 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4790 addr
= GEN_INT (INTVAL (addr
) & -16);
4794 p1_lo
= gen_reg_rtx (SImode
);
4795 emit_move_insn (p1_lo
, addr
);
4799 gcc_assert (aform
== 0 || aform
== 1);
4800 reg
= gen_reg_rtx (TImode
);
4802 scalar
= store_with_one_insn_p (ops
[0]);
4805 /* We could copy the flags from the ops[0] MEM to mem here,
4806 We don't because we want this load to be optimized away if
4807 possible, and copying the flags will prevent that in certain
4808 cases, e.g. consider the volatile flag. */
4810 rtx pat
= gen_reg_rtx (TImode
);
4811 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4812 set_mem_alias_set (lmem
, 0);
4813 emit_insn (gen_movti (reg
, lmem
));
4815 if (!p0
|| reg_aligned_for_addr (p0
))
4816 p0
= stack_pointer_rtx
;
4820 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4821 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4825 if (GET_CODE (ops
[1]) == REG
)
4826 emit_insn (gen_spu_convert (reg
, ops
[1]));
4827 else if (GET_CODE (ops
[1]) == SUBREG
)
4828 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4833 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4834 emit_insn (gen_ashlti3
4835 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4837 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4838 /* We can't use the previous alias set because the memory has changed
4839 size and can potentially overlap objects of other types. */
4840 set_mem_alias_set (smem
, 0);
4842 emit_insn (gen_movti (smem
, reg
));
4846 /* Return TRUE if X is MEM which is a struct member reference
4847 and the member can safely be loaded and stored with a single
4848 instruction because it is padded. */
4850 mem_is_padded_component_ref (rtx x
)
4852 tree t
= MEM_EXPR (x
);
4854 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4856 t
= TREE_OPERAND (t
, 1);
4857 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4858 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4860 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4861 r
= DECL_FIELD_CONTEXT (t
);
4862 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4864 /* Make sure they are the same mode */
4865 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4867 /* If there are no following fields then the field alignment assures
4868 the structure is padded to the alignment which means this field is
4870 if (TREE_CHAIN (t
) == 0)
4872 /* If the following field is also aligned then this field will be
4875 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4880 /* Parse the -mfixed-range= option string. */
4882 fix_range (const char *const_str
)
4885 char *str
, *dash
, *comma
;
4887 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4888 REG2 are either register names or register numbers. The effect
4889 of this option is to mark the registers in the range from REG1 to
4890 REG2 as ``fixed'' so they won't be used by the compiler. */
4892 i
= strlen (const_str
);
4893 str
= (char *) alloca (i
+ 1);
4894 memcpy (str
, const_str
, i
+ 1);
4898 dash
= strchr (str
, '-');
4901 warning (0, "value of -mfixed-range must have form REG1-REG2");
4905 comma
= strchr (dash
+ 1, ',');
4909 first
= decode_reg_name (str
);
4912 warning (0, "unknown register name: %s", str
);
4916 last
= decode_reg_name (dash
+ 1);
4919 warning (0, "unknown register name: %s", dash
+ 1);
4927 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4931 for (i
= first
; i
<= last
; ++i
)
4932 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4942 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4943 can be generated using the fsmbi instruction. */
4945 fsmbi_const_p (rtx x
)
4949 /* We can always choose TImode for CONST_INT because the high bits
4950 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4951 enum immediate_class c
= classify_immediate (x
, TImode
);
4952 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4957 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4958 can be generated using the cbd, chd, cwd or cdd instruction. */
4960 cpat_const_p (rtx x
, machine_mode mode
)
4964 enum immediate_class c
= classify_immediate (x
, mode
);
4965 return c
== IC_CPAT
;
4971 gen_cpat_const (rtx
* ops
)
4973 unsigned char dst
[16];
4974 int i
, offset
, shift
, isize
;
4975 if (GET_CODE (ops
[3]) != CONST_INT
4976 || GET_CODE (ops
[2]) != CONST_INT
4977 || (GET_CODE (ops
[1]) != CONST_INT
4978 && GET_CODE (ops
[1]) != REG
))
4980 if (GET_CODE (ops
[1]) == REG
4981 && (!REG_POINTER (ops
[1])
4982 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
4985 for (i
= 0; i
< 16; i
++)
4987 isize
= INTVAL (ops
[3]);
4990 else if (isize
== 2)
4994 offset
= (INTVAL (ops
[2]) +
4995 (GET_CODE (ops
[1]) ==
4996 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
4997 for (i
= 0; i
< isize
; i
++)
4998 dst
[offset
+ i
] = i
+ shift
;
4999 return array_to_constant (TImode
, dst
);
5002 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5003 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5004 than 16 bytes, the value is repeated across the rest of the array. */
5006 constant_to_array (machine_mode mode
, rtx x
, unsigned char arr
[16])
5011 memset (arr
, 0, 16);
5012 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5013 if (GET_CODE (x
) == CONST_INT
5014 || (GET_CODE (x
) == CONST_DOUBLE
5015 && (mode
== SFmode
|| mode
== DFmode
)))
5017 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5019 if (GET_CODE (x
) == CONST_DOUBLE
)
5020 val
= const_double_to_hwint (x
);
5023 first
= GET_MODE_SIZE (mode
) - 1;
5024 for (i
= first
; i
>= 0; i
--)
5026 arr
[i
] = val
& 0xff;
5029 /* Splat the constant across the whole array. */
5030 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5033 j
= (j
== first
) ? 0 : j
+ 1;
5036 else if (GET_CODE (x
) == CONST_DOUBLE
)
5038 val
= CONST_DOUBLE_LOW (x
);
5039 for (i
= 15; i
>= 8; i
--)
5041 arr
[i
] = val
& 0xff;
5044 val
= CONST_DOUBLE_HIGH (x
);
5045 for (i
= 7; i
>= 0; i
--)
5047 arr
[i
] = val
& 0xff;
5051 else if (GET_CODE (x
) == CONST_VECTOR
)
5055 mode
= GET_MODE_INNER (mode
);
5056 units
= CONST_VECTOR_NUNITS (x
);
5057 for (i
= 0; i
< units
; i
++)
5059 elt
= CONST_VECTOR_ELT (x
, i
);
5060 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5062 if (GET_CODE (elt
) == CONST_DOUBLE
)
5063 val
= const_double_to_hwint (elt
);
5066 first
= GET_MODE_SIZE (mode
) - 1;
5067 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5069 for (j
= first
; j
>= 0; j
--)
5071 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5081 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5082 smaller than 16 bytes, use the bytes that would represent that value
5083 in a register, e.g., for QImode return the value of arr[3]. */
5085 array_to_constant (machine_mode mode
, const unsigned char arr
[16])
5087 machine_mode inner_mode
;
5089 int units
, size
, i
, j
, k
;
5092 if (GET_MODE_CLASS (mode
) == MODE_INT
5093 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5095 j
= GET_MODE_SIZE (mode
);
5096 i
= j
< 4 ? 4 - j
: 0;
5097 for (val
= 0; i
< j
; i
++)
5098 val
= (val
<< 8) | arr
[i
];
5099 val
= trunc_int_for_mode (val
, mode
);
5100 return GEN_INT (val
);
5106 for (i
= high
= 0; i
< 8; i
++)
5107 high
= (high
<< 8) | arr
[i
];
5108 for (i
= 8, val
= 0; i
< 16; i
++)
5109 val
= (val
<< 8) | arr
[i
];
5110 return immed_double_const (val
, high
, TImode
);
5114 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5115 val
= trunc_int_for_mode (val
, SImode
);
5116 return hwint_to_const_double (SFmode
, val
);
5120 for (i
= 0, val
= 0; i
< 8; i
++)
5121 val
= (val
<< 8) | arr
[i
];
5122 return hwint_to_const_double (DFmode
, val
);
5125 if (!VECTOR_MODE_P (mode
))
5128 units
= GET_MODE_NUNITS (mode
);
5129 size
= GET_MODE_UNIT_SIZE (mode
);
5130 inner_mode
= GET_MODE_INNER (mode
);
5131 v
= rtvec_alloc (units
);
5133 for (k
= i
= 0; i
< units
; ++i
)
5136 for (j
= 0; j
< size
; j
++, k
++)
5137 val
= (val
<< 8) | arr
[k
];
5139 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5140 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5142 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5147 return gen_rtx_CONST_VECTOR (mode
, v
);
5151 reloc_diagnostic (rtx x
)
5154 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5157 if (GET_CODE (x
) == SYMBOL_REF
)
5158 decl
= SYMBOL_REF_DECL (x
);
5159 else if (GET_CODE (x
) == CONST
5160 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5161 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5163 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5164 if (decl
&& !DECL_P (decl
))
5167 /* The decl could be a string constant. */
5168 if (decl
&& DECL_P (decl
))
5171 /* We use last_assemble_variable_decl to get line information. It's
5172 not always going to be right and might not even be close, but will
5173 be right for the more common cases. */
5174 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5175 loc
= DECL_SOURCE_LOCATION (decl
);
5177 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5179 if (TARGET_WARN_RELOC
)
5181 "creating run-time relocation for %qD", decl
);
5184 "creating run-time relocation for %qD", decl
);
5188 if (TARGET_WARN_RELOC
)
5189 warning_at (input_location
, 0, "creating run-time relocation");
5191 error_at (input_location
, "creating run-time relocation");
5195 /* Hook into assemble_integer so we can generate an error for run-time
5196 relocations. The SPU ABI disallows them. */
5198 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5200 /* By default run-time relocations aren't supported, but we allow them
5201 in case users support it in their own run-time loader. And we provide
5202 a warning for those users that don't. */
5203 if ((GET_CODE (x
) == SYMBOL_REF
)
5204 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5205 reloc_diagnostic (x
);
5207 return default_assemble_integer (x
, size
, aligned_p
);
5211 spu_asm_globalize_label (FILE * file
, const char *name
)
5213 fputs ("\t.global\t", file
);
5214 assemble_name (file
, name
);
5219 spu_rtx_costs (rtx x
, machine_mode mode
, int outer_code ATTRIBUTE_UNUSED
,
5220 int opno ATTRIBUTE_UNUSED
, int *total
,
5221 bool speed ATTRIBUTE_UNUSED
)
5223 int code
= GET_CODE (x
);
5224 int cost
= COSTS_N_INSNS (2);
5226 /* Folding to a CONST_VECTOR will use extra space but there might
5227 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5228 only if it allows us to fold away multiple insns. Changing the cost
5229 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5230 because this cost will only be compared against a single insn.
5231 if (code == CONST_VECTOR)
5232 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5235 /* Use defaults for float operations. Not accurate but good enough. */
5238 *total
= COSTS_N_INSNS (13);
5243 *total
= COSTS_N_INSNS (6);
5249 if (satisfies_constraint_K (x
))
5251 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5252 *total
= COSTS_N_INSNS (1);
5254 *total
= COSTS_N_INSNS (3);
5258 *total
= COSTS_N_INSNS (3);
5263 *total
= COSTS_N_INSNS (0);
5267 *total
= COSTS_N_INSNS (5);
5271 case FLOAT_TRUNCATE
:
5273 case UNSIGNED_FLOAT
:
5276 *total
= COSTS_N_INSNS (7);
5282 *total
= COSTS_N_INSNS (9);
5289 GET_CODE (XEXP (x
, 0)) ==
5290 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5291 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5293 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5295 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5296 cost
= COSTS_N_INSNS (14);
5297 if ((val
& 0xffff) == 0)
5298 cost
= COSTS_N_INSNS (9);
5299 else if (val
> 0 && val
< 0x10000)
5300 cost
= COSTS_N_INSNS (11);
5309 *total
= COSTS_N_INSNS (20);
5316 *total
= COSTS_N_INSNS (4);
5319 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5320 *total
= COSTS_N_INSNS (0);
5322 *total
= COSTS_N_INSNS (4);
5325 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5326 if (GET_MODE_CLASS (mode
) == MODE_INT
5327 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5328 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5329 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5334 static scalar_int_mode
5335 spu_unwind_word_mode (void)
5340 /* Decide whether we can make a sibling call to a function. DECL is the
5341 declaration of the function being targeted by the call and EXP is the
5342 CALL_EXPR representing the call. */
5344 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5346 return decl
&& !TARGET_LARGE_MEM
;
5349 /* We need to correctly update the back chain pointer and the Available
5350 Stack Size (which is in the second slot of the sp register.) */
5352 spu_allocate_stack (rtx op0
, rtx op1
)
5355 rtx chain
= gen_reg_rtx (V4SImode
);
5356 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5357 rtx sp
= gen_reg_rtx (V4SImode
);
5358 rtx splatted
= gen_reg_rtx (V4SImode
);
5359 rtx pat
= gen_reg_rtx (TImode
);
5361 /* copy the back chain so we can save it back again. */
5362 emit_move_insn (chain
, stack_bot
);
5364 op1
= force_reg (SImode
, op1
);
5366 v
= 0x1020300010203ll
;
5367 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5368 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5370 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5371 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5373 if (flag_stack_check
)
5375 rtx avail
= gen_reg_rtx(SImode
);
5376 rtx result
= gen_reg_rtx(SImode
);
5377 emit_insn (gen_vec_extractv4sisi (avail
, sp
, GEN_INT (1)));
5378 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5379 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5382 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5384 emit_move_insn (stack_bot
, chain
);
5386 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5390 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5392 static unsigned char arr
[16] =
5393 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5394 rtx temp
= gen_reg_rtx (SImode
);
5395 rtx temp2
= gen_reg_rtx (SImode
);
5396 rtx temp3
= gen_reg_rtx (V4SImode
);
5397 rtx temp4
= gen_reg_rtx (V4SImode
);
5398 rtx pat
= gen_reg_rtx (TImode
);
5399 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5401 /* Restore the backchain from the first word, sp from the second. */
5402 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5403 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5405 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5407 /* Compute Available Stack Size for sp */
5408 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5409 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5411 /* Compute Available Stack Size for back chain */
5412 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5413 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5414 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5416 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5417 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5421 spu_init_libfuncs (void)
5423 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5424 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5425 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5426 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5427 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5428 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5429 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5430 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5431 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5432 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5433 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5434 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5436 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5437 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5439 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5440 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5441 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5442 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5443 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5444 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5445 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5446 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5447 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5448 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5449 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5450 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5452 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5453 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5454 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5455 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5456 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5457 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5460 /* Make a subreg, stripping any existing subreg. We could possibly just
5461 call simplify_subreg, but in this case we know what we want. */
5463 spu_gen_subreg (machine_mode mode
, rtx x
)
5465 if (GET_CODE (x
) == SUBREG
)
5467 if (GET_MODE (x
) == mode
)
5469 return gen_rtx_SUBREG (mode
, x
, 0);
5473 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5475 return (TYPE_MODE (type
) == BLKmode
5477 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5478 || int_size_in_bytes (type
) >
5479 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5482 /* Create the built-in types and functions */
5484 enum spu_function_code
5486 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5487 #include "spu-builtins.def"
5492 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5494 struct spu_builtin_description spu_builtins
[] = {
5495 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5496 {fcode, icode, name, type, params},
5497 #include "spu-builtins.def"
5501 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5503 /* Returns the spu builtin decl for CODE. */
5506 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5508 if (code
>= NUM_SPU_BUILTINS
)
5509 return error_mark_node
;
5511 return spu_builtin_decls
[code
];
5516 spu_init_builtins (void)
5518 struct spu_builtin_description
*d
;
5521 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5522 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5523 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5524 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5525 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5526 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5528 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5529 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5530 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5531 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5533 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5535 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5536 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5537 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5538 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5539 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5540 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5541 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5542 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5543 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5544 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5545 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5546 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5548 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5549 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5550 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5551 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5552 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5553 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5554 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5555 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5557 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5558 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5560 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5562 spu_builtin_types
[SPU_BTI_PTR
] =
5563 build_pointer_type (build_qualified_type
5565 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5567 /* For each builtin we build a new prototype. The tree code will make
5568 sure nodes are shared. */
5569 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5572 char name
[64]; /* build_function will make a copy. */
5578 /* Find last parm. */
5579 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5584 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5586 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5588 sprintf (name
, "__builtin_%s", d
->name
);
5589 spu_builtin_decls
[i
] =
5590 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5591 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5592 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5594 /* These builtins don't throw. */
5595 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5600 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5602 static unsigned char arr
[16] =
5603 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5605 rtx temp
= gen_reg_rtx (Pmode
);
5606 rtx temp2
= gen_reg_rtx (V4SImode
);
5607 rtx temp3
= gen_reg_rtx (V4SImode
);
5608 rtx pat
= gen_reg_rtx (TImode
);
5609 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5611 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5613 /* Restore the sp. */
5614 emit_move_insn (temp
, op1
);
5615 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5617 /* Compute available stack size for sp. */
5618 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5619 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5621 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5622 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5626 spu_safe_dma (HOST_WIDE_INT channel
)
5628 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5632 spu_builtin_splats (rtx ops
[])
5634 machine_mode mode
= GET_MODE (ops
[0]);
5635 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5637 unsigned char arr
[16];
5638 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5639 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5643 rtx reg
= gen_reg_rtx (TImode
);
5645 if (GET_CODE (ops
[1]) != REG
5646 && GET_CODE (ops
[1]) != SUBREG
)
5647 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5653 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5659 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5664 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5669 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5675 emit_move_insn (reg
, shuf
);
5676 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5681 spu_builtin_extract (rtx ops
[])
5686 mode
= GET_MODE (ops
[1]);
5688 if (GET_CODE (ops
[2]) == CONST_INT
)
5693 emit_insn (gen_vec_extractv16qiqi (ops
[0], ops
[1], ops
[2]));
5696 emit_insn (gen_vec_extractv8hihi (ops
[0], ops
[1], ops
[2]));
5699 emit_insn (gen_vec_extractv4sfsf (ops
[0], ops
[1], ops
[2]));
5702 emit_insn (gen_vec_extractv4sisi (ops
[0], ops
[1], ops
[2]));
5705 emit_insn (gen_vec_extractv2didi (ops
[0], ops
[1], ops
[2]));
5708 emit_insn (gen_vec_extractv2dfdf (ops
[0], ops
[1], ops
[2]));
5716 from
= spu_gen_subreg (TImode
, ops
[1]);
5717 rot
= gen_reg_rtx (TImode
);
5718 tmp
= gen_reg_rtx (SImode
);
5723 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5726 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5727 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5731 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5735 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5740 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5742 emit_insn (gen_spu_convert (ops
[0], rot
));
5746 spu_builtin_insert (rtx ops
[])
5748 machine_mode mode
= GET_MODE (ops
[0]);
5749 machine_mode imode
= GET_MODE_INNER (mode
);
5750 rtx mask
= gen_reg_rtx (TImode
);
5753 if (GET_CODE (ops
[3]) == CONST_INT
)
5754 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5757 offset
= gen_reg_rtx (SImode
);
5758 emit_insn (gen_mulsi3
5759 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5762 (mask
, stack_pointer_rtx
, offset
,
5763 GEN_INT (GET_MODE_SIZE (imode
))));
5764 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5768 spu_builtin_promote (rtx ops
[])
5770 machine_mode mode
, imode
;
5771 rtx rot
, from
, offset
;
5774 mode
= GET_MODE (ops
[0]);
5775 imode
= GET_MODE_INNER (mode
);
5777 from
= gen_reg_rtx (TImode
);
5778 rot
= spu_gen_subreg (TImode
, ops
[0]);
5780 emit_insn (gen_spu_convert (from
, ops
[1]));
5782 if (GET_CODE (ops
[2]) == CONST_INT
)
5784 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5785 if (GET_MODE_SIZE (imode
) < 4)
5786 pos
+= 4 - GET_MODE_SIZE (imode
);
5787 offset
= GEN_INT (pos
& 15);
5791 offset
= gen_reg_rtx (SImode
);
5795 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5798 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5799 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5803 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5804 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5808 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5814 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5818 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5820 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5821 rtx shuf
= gen_reg_rtx (V4SImode
);
5822 rtx insn
= gen_reg_rtx (V4SImode
);
5827 fnaddr
= force_reg (SImode
, fnaddr
);
5828 cxt
= force_reg (SImode
, cxt
);
5830 if (TARGET_LARGE_MEM
)
5832 rtx rotl
= gen_reg_rtx (V4SImode
);
5833 rtx mask
= gen_reg_rtx (V4SImode
);
5834 rtx bi
= gen_reg_rtx (SImode
);
5835 static unsigned char const shufa
[16] = {
5836 2, 3, 0, 1, 18, 19, 16, 17,
5837 0, 1, 2, 3, 16, 17, 18, 19
5839 static unsigned char const insna
[16] = {
5841 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5843 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5846 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5847 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5849 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5850 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5851 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5852 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5854 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5855 emit_move_insn (mem
, insn
);
5857 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5858 mem
= adjust_address (m_tramp
, Pmode
, 16);
5859 emit_move_insn (mem
, bi
);
5863 rtx scxt
= gen_reg_rtx (SImode
);
5864 rtx sfnaddr
= gen_reg_rtx (SImode
);
5865 static unsigned char const insna
[16] = {
5866 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5872 shufc
= gen_reg_rtx (TImode
);
5873 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5875 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5876 fits 18 bits and the last 4 are zeros. This will be true if
5877 the stack pointer is initialized to 0x3fff0 at program start,
5878 otherwise the ila instruction will be garbage. */
5880 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5881 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5883 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5884 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5885 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5887 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5888 emit_move_insn (mem
, insn
);
5890 emit_insn (gen_sync ());
5894 spu_warn_func_return (tree decl
)
5896 /* Naked functions are implemented entirely in assembly, including the
5897 return sequence, so suppress warnings about this. */
5898 return !spu_naked_function_p (decl
);
5902 spu_expand_sign_extend (rtx ops
[])
5904 unsigned char arr
[16];
5905 rtx pat
= gen_reg_rtx (TImode
);
5908 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5909 if (GET_MODE (ops
[1]) == QImode
)
5911 sign
= gen_reg_rtx (HImode
);
5912 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5913 for (i
= 0; i
< 16; i
++)
5919 for (i
= 0; i
< 16; i
++)
5921 switch (GET_MODE (ops
[1]))
5924 sign
= gen_reg_rtx (SImode
);
5925 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5927 arr
[last
- 1] = 0x02;
5930 sign
= gen_reg_rtx (SImode
);
5931 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5932 for (i
= 0; i
< 4; i
++)
5933 arr
[last
- i
] = 3 - i
;
5936 sign
= gen_reg_rtx (SImode
);
5937 c
= gen_reg_rtx (SImode
);
5938 emit_insn (gen_spu_convert (c
, ops
[1]));
5939 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5940 for (i
= 0; i
< 8; i
++)
5941 arr
[last
- i
] = 7 - i
;
5947 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5948 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5951 /* expand vector initialization. If there are any constant parts,
5952 load constant parts first. Then load any non-constant parts. */
5954 spu_expand_vector_init (rtx target
, rtx vals
)
5956 machine_mode mode
= GET_MODE (target
);
5957 int n_elts
= GET_MODE_NUNITS (mode
);
5959 bool all_same
= true;
5960 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5963 first
= XVECEXP (vals
, 0, 0);
5964 for (i
= 0; i
< n_elts
; ++i
)
5966 x
= XVECEXP (vals
, 0, i
);
5967 if (!(CONST_INT_P (x
)
5968 || GET_CODE (x
) == CONST_DOUBLE
5969 || GET_CODE (x
) == CONST_FIXED
))
5973 if (first_constant
== NULL_RTX
)
5976 if (i
> 0 && !rtx_equal_p (x
, first
))
5980 /* if all elements are the same, use splats to repeat elements */
5983 if (!CONSTANT_P (first
)
5984 && !register_operand (first
, GET_MODE (x
)))
5985 first
= force_reg (GET_MODE (first
), first
);
5986 emit_insn (gen_spu_splats (target
, first
));
5990 /* load constant parts */
5991 if (n_var
!= n_elts
)
5995 emit_move_insn (target
,
5996 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6000 rtx constant_parts_rtx
= copy_rtx (vals
);
6002 gcc_assert (first_constant
!= NULL_RTX
);
6003 /* fill empty slots with the first constant, this increases
6004 our chance of using splats in the recursive call below. */
6005 for (i
= 0; i
< n_elts
; ++i
)
6007 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6008 if (!(CONST_INT_P (x
)
6009 || GET_CODE (x
) == CONST_DOUBLE
6010 || GET_CODE (x
) == CONST_FIXED
))
6011 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6014 spu_expand_vector_init (target
, constant_parts_rtx
);
6018 /* load variable parts */
6021 rtx insert_operands
[4];
6023 insert_operands
[0] = target
;
6024 insert_operands
[2] = target
;
6025 for (i
= 0; i
< n_elts
; ++i
)
6027 x
= XVECEXP (vals
, 0, i
);
6028 if (!(CONST_INT_P (x
)
6029 || GET_CODE (x
) == CONST_DOUBLE
6030 || GET_CODE (x
) == CONST_FIXED
))
6032 if (!register_operand (x
, GET_MODE (x
)))
6033 x
= force_reg (GET_MODE (x
), x
);
6034 insert_operands
[1] = x
;
6035 insert_operands
[3] = GEN_INT (i
);
6036 spu_builtin_insert (insert_operands
);
6042 /* Return insn index for the vector compare instruction for given CODE,
6043 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6046 get_vec_cmp_insn (enum rtx_code code
,
6047 machine_mode dest_mode
,
6048 machine_mode op_mode
)
6054 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6055 return CODE_FOR_ceq_v16qi
;
6056 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6057 return CODE_FOR_ceq_v8hi
;
6058 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6059 return CODE_FOR_ceq_v4si
;
6060 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6061 return CODE_FOR_ceq_v4sf
;
6062 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6063 return CODE_FOR_ceq_v2df
;
6066 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6067 return CODE_FOR_cgt_v16qi
;
6068 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6069 return CODE_FOR_cgt_v8hi
;
6070 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6071 return CODE_FOR_cgt_v4si
;
6072 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6073 return CODE_FOR_cgt_v4sf
;
6074 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6075 return CODE_FOR_cgt_v2df
;
6078 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6079 return CODE_FOR_clgt_v16qi
;
6080 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6081 return CODE_FOR_clgt_v8hi
;
6082 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6083 return CODE_FOR_clgt_v4si
;
6091 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6092 DMODE is expected destination mode. This is a recursive function. */
6095 spu_emit_vector_compare (enum rtx_code rcode
,
6101 machine_mode dest_mode
;
6102 machine_mode op_mode
= GET_MODE (op1
);
6104 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6106 /* Floating point vector compare instructions uses destination V4SImode.
6107 Double floating point vector compare instructions uses destination V2DImode.
6108 Move destination to appropriate mode later. */
6109 if (dmode
== V4SFmode
)
6110 dest_mode
= V4SImode
;
6111 else if (dmode
== V2DFmode
)
6112 dest_mode
= V2DImode
;
6116 mask
= gen_reg_rtx (dest_mode
);
6117 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6119 if (vec_cmp_insn
== -1)
6121 bool swap_operands
= false;
6122 bool try_again
= false;
6127 swap_operands
= true;
6132 swap_operands
= true;
6142 /* Treat A != B as ~(A==B). */
6144 enum rtx_code rev_code
;
6145 enum insn_code nor_code
;
6148 rev_code
= reverse_condition_maybe_unordered (rcode
);
6149 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6151 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6152 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6153 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6154 if (dmode
!= dest_mode
)
6156 rtx temp
= gen_reg_rtx (dest_mode
);
6157 convert_move (temp
, mask
, 0);
6167 /* Try GT/GTU/LT/LTU OR EQ */
6170 enum insn_code ior_code
;
6171 enum rtx_code new_code
;
6175 case GE
: new_code
= GT
; break;
6176 case GEU
: new_code
= GTU
; break;
6177 case LE
: new_code
= LT
; break;
6178 case LEU
: new_code
= LTU
; break;
6183 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6184 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6186 ior_code
= optab_handler (ior_optab
, dest_mode
);
6187 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6188 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6189 if (dmode
!= dest_mode
)
6191 rtx temp
= gen_reg_rtx (dest_mode
);
6192 convert_move (temp
, mask
, 0);
6202 enum insn_code ior_code
;
6204 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6205 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6207 ior_code
= optab_handler (ior_optab
, dest_mode
);
6208 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6209 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6210 if (dmode
!= dest_mode
)
6212 rtx temp
= gen_reg_rtx (dest_mode
);
6213 convert_move (temp
, mask
, 0);
6220 /* Implement as (A==A) & (B==B) */
6223 enum insn_code and_code
;
6225 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6226 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6228 and_code
= optab_handler (and_optab
, dest_mode
);
6229 gcc_assert (and_code
!= CODE_FOR_nothing
);
6230 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6231 if (dmode
!= dest_mode
)
6233 rtx temp
= gen_reg_rtx (dest_mode
);
6234 convert_move (temp
, mask
, 0);
6244 /* You only get two chances. */
6246 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6248 gcc_assert (vec_cmp_insn
!= -1);
6259 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6260 if (dmode
!= dest_mode
)
6262 rtx temp
= gen_reg_rtx (dest_mode
);
6263 convert_move (temp
, mask
, 0);
6270 /* Emit vector conditional expression.
6271 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6272 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6275 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6276 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6278 machine_mode dest_mode
= GET_MODE (dest
);
6279 enum rtx_code rcode
= GET_CODE (cond
);
6282 /* Get the vector mask for the given relational operations. */
6283 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6285 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6291 spu_force_reg (machine_mode mode
, rtx op
)
6294 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6296 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6297 || GET_MODE (op
) == BLKmode
)
6298 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6302 r
= force_reg (GET_MODE (op
), op
);
6303 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6305 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6310 x
= gen_reg_rtx (mode
);
6311 emit_insn (gen_spu_convert (x
, r
));
6316 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6318 HOST_WIDE_INT v
= 0;
6320 /* Check the range of immediate operands. */
6321 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6323 int range
= p
- SPU_BTI_7
;
6325 if (!CONSTANT_P (op
))
6326 error ("%s expects an integer literal in the range [%d, %d]",
6328 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6330 if (GET_CODE (op
) == CONST
6331 && (GET_CODE (XEXP (op
, 0)) == PLUS
6332 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6334 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6335 op
= XEXP (XEXP (op
, 0), 0);
6337 else if (GET_CODE (op
) == CONST_INT
)
6339 else if (GET_CODE (op
) == CONST_VECTOR
6340 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6341 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6343 /* The default for v is 0 which is valid in every range. */
6344 if (v
< spu_builtin_range
[range
].low
6345 || v
> spu_builtin_range
[range
].high
)
6346 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6348 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6357 /* This is only used in lqa, and stqa. Even though the insns
6358 encode 16 bits of the address (all but the 2 least
6359 significant), only 14 bits are used because it is masked to
6360 be 16 byte aligned. */
6364 /* This is used for lqr and stqr. */
6371 if (GET_CODE (op
) == LABEL_REF
6372 || (GET_CODE (op
) == SYMBOL_REF
6373 && SYMBOL_REF_FUNCTION_P (op
))
6374 || (v
& ((1 << lsbits
) - 1)) != 0)
6375 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6382 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6383 rtx target
, rtx ops
[])
6385 enum insn_code icode
= (enum insn_code
) d
->icode
;
6388 /* Expand the arguments into rtl. */
6390 if (d
->parm
[0] != SPU_BTI_VOID
)
6393 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6395 tree arg
= CALL_EXPR_ARG (exp
, a
);
6398 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6401 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6406 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6407 tree exp
, rtx target
)
6411 enum insn_code icode
= (enum insn_code
) d
->icode
;
6412 machine_mode mode
, tmode
;
6417 /* Set up ops[] with values from arglist. */
6418 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6420 /* Handle the target operand which must be operand 0. */
6422 if (d
->parm
[0] != SPU_BTI_VOID
)
6425 /* We prefer the mode specified for the match_operand otherwise
6426 use the mode from the builtin function prototype. */
6427 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6428 if (tmode
== VOIDmode
)
6429 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6431 /* Try to use target because not using it can lead to extra copies
6432 and when we are using all of the registers extra copies leads
6434 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6437 target
= ops
[0] = gen_reg_rtx (tmode
);
6439 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6445 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6447 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6452 arg
= CALL_EXPR_ARG (exp
, 0);
6453 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6454 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6455 addr
= memory_address (mode
, op
);
6458 op
= gen_reg_rtx (GET_MODE (addr
));
6459 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
6460 op
= gen_rtx_MEM (mode
, op
);
6462 pat
= GEN_FCN (icode
) (target
, op
);
6469 /* Ignore align_hint, but still expand it's args in case they have
6471 if (icode
== CODE_FOR_spu_align_hint
)
6474 /* Handle the rest of the operands. */
6475 for (p
= 1; i
< n_operands
; i
++, p
++)
6477 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6478 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6480 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6482 /* mode can be VOIDmode here for labels */
6484 /* For specific intrinsics with an immediate operand, e.g.,
6485 si_ai(), we sometimes need to convert the scalar argument to a
6486 vector argument by splatting the scalar. */
6487 if (VECTOR_MODE_P (mode
)
6488 && (GET_CODE (ops
[i
]) == CONST_INT
6489 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6490 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6492 if (GET_CODE (ops
[i
]) == CONST_INT
)
6493 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6496 rtx reg
= gen_reg_rtx (mode
);
6497 machine_mode imode
= GET_MODE_INNER (mode
);
6498 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6499 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6500 if (imode
!= GET_MODE (ops
[i
]))
6501 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6502 TYPE_UNSIGNED (spu_builtin_types
6504 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6509 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6511 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6512 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6518 pat
= GEN_FCN (icode
) (0);
6521 pat
= GEN_FCN (icode
) (ops
[0]);
6524 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6527 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6530 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6533 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6536 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6545 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6546 emit_call_insn (pat
);
6547 else if (d
->type
== B_JUMP
)
6549 emit_jump_insn (pat
);
6555 return_type
= spu_builtin_types
[d
->parm
[0]];
6556 if (d
->parm
[0] != SPU_BTI_VOID
6557 && GET_MODE (target
) != TYPE_MODE (return_type
))
6559 /* target is the return value. It should always be the mode of
6560 the builtin function prototype. */
6561 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6568 spu_expand_builtin (tree exp
,
6570 rtx subtarget ATTRIBUTE_UNUSED
,
6571 machine_mode mode ATTRIBUTE_UNUSED
,
6572 int ignore ATTRIBUTE_UNUSED
)
6574 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6575 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6576 struct spu_builtin_description
*d
;
6578 if (fcode
< NUM_SPU_BUILTINS
)
6580 d
= &spu_builtins
[fcode
];
6582 return spu_expand_builtin_1 (d
, exp
, target
);
6587 /* Implement targetm.vectorize.builtin_mask_for_load. */
6589 spu_builtin_mask_for_load (void)
6591 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6594 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6596 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6598 int misalign ATTRIBUTE_UNUSED
)
6602 switch (type_of_cost
)
6610 case cond_branch_not_taken
:
6612 case vec_promote_demote
:
6619 /* Load + rotate. */
6622 case unaligned_load
:
6625 case cond_branch_taken
:
6629 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6630 return elements
/ 2 + 1;
6637 /* Implement targetm.vectorize.init_cost. */
6640 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6642 unsigned *cost
= XNEWVEC (unsigned, 3);
6643 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6647 /* Implement targetm.vectorize.add_stmt_cost. */
6650 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6651 struct _stmt_vec_info
*stmt_info
, int misalign
,
6652 enum vect_cost_model_location where
)
6654 unsigned *cost
= (unsigned *) data
;
6655 unsigned retval
= 0;
6657 if (flag_vect_cost_model
)
6659 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6660 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6662 /* Statements in an inner loop relative to the loop being
6663 vectorized are weighted more heavily. The value here is
6664 arbitrary and could potentially be improved with analysis. */
6665 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6666 count
*= 50; /* FIXME. */
6668 retval
= (unsigned) (count
* stmt_cost
);
6669 cost
[where
] += retval
;
6675 /* Implement targetm.vectorize.finish_cost. */
6678 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6679 unsigned *body_cost
, unsigned *epilogue_cost
)
6681 unsigned *cost
= (unsigned *) data
;
6682 *prologue_cost
= cost
[vect_prologue
];
6683 *body_cost
= cost
[vect_body
];
6684 *epilogue_cost
= cost
[vect_epilogue
];
6687 /* Implement targetm.vectorize.destroy_cost_data. */
6690 spu_destroy_cost_data (void *data
)
6695 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6696 after applying N number of iterations. This routine does not determine
6697 how may iterations are required to reach desired alignment. */
6700 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6705 /* All other types are naturally aligned. */
6709 /* Return the appropriate mode for a named address pointer. */
6710 static scalar_int_mode
6711 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6715 case ADDR_SPACE_GENERIC
:
6724 /* Return the appropriate mode for a named address address. */
6725 static scalar_int_mode
6726 spu_addr_space_address_mode (addr_space_t addrspace
)
6730 case ADDR_SPACE_GENERIC
:
6739 /* Determine if one named address space is a subset of another. */
6742 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6744 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6745 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6747 if (subset
== superset
)
6750 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6751 being subsets but instead as disjoint address spaces. */
6752 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6756 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6759 /* Convert from one address space to another. */
6761 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6763 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6764 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6766 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6767 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6769 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6773 ls
= gen_const_mem (DImode
,
6774 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6775 set_mem_align (ls
, 128);
6777 result
= gen_reg_rtx (Pmode
);
6778 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6779 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6780 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6781 ls
, const0_rtx
, Pmode
, 1);
6783 emit_insn (gen_subsi3 (result
, op
, ls
));
6788 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6792 ls
= gen_const_mem (DImode
,
6793 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6794 set_mem_align (ls
, 128);
6796 result
= gen_reg_rtx (EAmode
);
6797 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6798 op
= force_reg (Pmode
, op
);
6799 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6800 ls
, const0_rtx
, EAmode
, 1);
6801 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6803 if (EAmode
== SImode
)
6804 emit_insn (gen_addsi3 (result
, op
, ls
));
6806 emit_insn (gen_adddi3 (result
, op
, ls
));
6816 /* Count the total number of instructions in each pipe and return the
6817 maximum, which is used as the Minimum Iteration Interval (MII)
6818 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6819 -2 are instructions that can go in pipe0 or pipe1. */
6821 spu_sms_res_mii (struct ddg
*g
)
6824 unsigned t
[4] = {0, 0, 0, 0};
6826 for (i
= 0; i
< g
->num_nodes
; i
++)
6828 rtx_insn
*insn
= g
->nodes
[i
].insn
;
6829 int p
= get_pipe (insn
) + 2;
6831 gcc_assert (p
>= 0);
6835 if (dump_file
&& INSN_P (insn
))
6836 fprintf (dump_file
, "i%d %s %d %d\n",
6838 insn_data
[INSN_CODE(insn
)].name
,
6842 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6844 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6849 spu_init_expanders (void)
6854 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6855 frame_pointer_needed is true. We don't know that until we're
6856 expanding the prologue. */
6857 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6859 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6860 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6861 to be treated as aligned, so generate them here. */
6862 r0
= gen_reg_rtx (SImode
);
6863 r1
= gen_reg_rtx (SImode
);
6864 mark_reg_pointer (r0
, 128);
6865 mark_reg_pointer (r1
, 128);
6866 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6867 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6871 static scalar_int_mode
6872 spu_libgcc_cmp_return_mode (void)
6875 /* For SPU word mode is TI mode so it is better to use SImode
6876 for compare returns. */
6880 static scalar_int_mode
6881 spu_libgcc_shift_count_mode (void)
6883 /* For SPU word mode is TI mode so it is better to use SImode
6884 for shift counts. */
6888 /* Implement targetm.section_type_flags. */
6890 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6892 /* .toe needs to have type @nobits. */
6893 if (strcmp (name
, ".toe") == 0)
6895 /* Don't load _ea into the current address space. */
6896 if (strcmp (name
, "._ea") == 0)
6897 return SECTION_WRITE
| SECTION_DEBUG
;
6898 return default_section_type_flags (decl
, name
, reloc
);
6901 /* Implement targetm.select_section. */
6903 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6905 /* Variables and constants defined in the __ea address space
6906 go into a special section named "._ea". */
6907 if (TREE_TYPE (decl
) != error_mark_node
6908 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6910 /* We might get called with string constants, but get_named_section
6911 doesn't like them as they are not DECLs. Also, we need to set
6912 flags in that case. */
6914 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6916 return get_named_section (decl
, "._ea", reloc
);
6919 return default_elf_select_section (decl
, reloc
, align
);
6922 /* Implement targetm.unique_section. */
6924 spu_unique_section (tree decl
, int reloc
)
6926 /* We don't support unique section names in the __ea address
6928 if (TREE_TYPE (decl
) != error_mark_node
6929 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6932 default_unique_section (decl
, reloc
);
6935 /* Generate a constant or register which contains 2^SCALE. We assume
6936 the result is valid for MODE. Currently, MODE must be V4SFmode and
6937 SCALE must be SImode. */
6939 spu_gen_exp2 (machine_mode mode
, rtx scale
)
6941 gcc_assert (mode
== V4SFmode
);
6942 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6943 if (GET_CODE (scale
) != CONST_INT
)
6945 /* unsigned int exp = (127 + scale) << 23;
6946 __vector float m = (__vector float) spu_splats (exp); */
6947 rtx reg
= force_reg (SImode
, scale
);
6948 rtx exp
= gen_reg_rtx (SImode
);
6949 rtx mul
= gen_reg_rtx (mode
);
6950 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6951 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6952 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6957 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6958 unsigned char arr
[16];
6959 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6960 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6961 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6962 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6963 return array_to_constant (mode
, arr
);
6967 /* After reload, just change the convert into a move instruction
6968 or a dead instruction. */
6970 spu_split_convert (rtx ops
[])
6972 if (REGNO (ops
[0]) == REGNO (ops
[1]))
6973 emit_note (NOTE_INSN_DELETED
);
6976 /* Use TImode always as this might help hard reg copyprop. */
6977 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
6978 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
6979 emit_insn (gen_move_insn (op0
, op1
));
6984 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
6986 fprintf (file
, "# profile\n");
6987 fprintf (file
, "brsl $75, _mcount\n");
6990 /* Implement targetm.ref_may_alias_errno. */
6992 spu_ref_may_alias_errno (ao_ref
*ref
)
6994 tree base
= ao_ref_base (ref
);
6996 /* With SPU newlib, errno is defined as something like
6998 The default implementation of this target macro does not
6999 recognize such expressions, so special-code for it here. */
7001 if (TREE_CODE (base
) == VAR_DECL
7002 && !TREE_STATIC (base
)
7003 && DECL_EXTERNAL (base
)
7004 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7005 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7006 "_impure_data") == 0
7007 /* _errno is the first member of _impure_data. */
7008 && ref
->offset
== 0)
7011 return default_ref_may_alias_errno (ref
);
7014 /* Output thunk to FILE that implements a C++ virtual function call (with
7015 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7016 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7017 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7018 relative to the resulting this pointer. */
7021 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7022 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7027 /* Make sure unwind info is emitted for the thunk if needed. */
7028 final_start_function (emit_barrier (), file
, 1);
7030 /* Operand 0 is the target function. */
7031 op
[0] = XEXP (DECL_RTL (function
), 0);
7033 /* Operand 1 is the 'this' pointer. */
7034 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7035 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7037 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7039 /* Operands 2/3 are the low/high halfwords of delta. */
7040 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7041 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7043 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7044 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7045 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7047 /* Operands 6/7 are temporary registers. */
7048 op
[6] = gen_rtx_REG (Pmode
, 79);
7049 op
[7] = gen_rtx_REG (Pmode
, 78);
7051 /* Add DELTA to this pointer. */
7054 if (delta
>= -0x200 && delta
< 0x200)
7055 output_asm_insn ("ai\t%1,%1,%2", op
);
7056 else if (delta
>= -0x8000 && delta
< 0x8000)
7058 output_asm_insn ("il\t%6,%2", op
);
7059 output_asm_insn ("a\t%1,%1,%6", op
);
7063 output_asm_insn ("ilhu\t%6,%3", op
);
7064 output_asm_insn ("iohl\t%6,%2", op
);
7065 output_asm_insn ("a\t%1,%1,%6", op
);
7069 /* Perform vcall adjustment. */
7072 output_asm_insn ("lqd\t%7,0(%1)", op
);
7073 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7075 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7076 output_asm_insn ("ai\t%7,%7,%4", op
);
7077 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7079 output_asm_insn ("il\t%6,%4", op
);
7080 output_asm_insn ("a\t%7,%7,%6", op
);
7084 output_asm_insn ("ilhu\t%6,%5", op
);
7085 output_asm_insn ("iohl\t%6,%4", op
);
7086 output_asm_insn ("a\t%7,%7,%6", op
);
7089 output_asm_insn ("lqd\t%6,0(%7)", op
);
7090 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7091 output_asm_insn ("a\t%1,%1,%6", op
);
7094 /* Jump to target. */
7095 output_asm_insn ("br\t%0", op
);
7097 final_end_function ();
7100 /* Canonicalize a comparison from one we don't have to one we do have. */
7102 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7103 bool op0_preserve_value
)
7105 if (!op0_preserve_value
7106 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7111 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7115 /* Expand an atomic fetch-and-operate pattern. CODE is the binary operation
7116 to perform. MEM is the memory on which to operate. VAL is the second
7117 operand of the binary operator. BEFORE and AFTER are optional locations to
7118 return the value of MEM either before of after the operation. */
7120 spu_expand_atomic_op (enum rtx_code code
, rtx mem
, rtx val
,
7121 rtx orig_before
, rtx orig_after
)
7123 machine_mode mode
= GET_MODE (mem
);
7124 rtx before
= orig_before
, after
= orig_after
;
7126 if (before
== NULL_RTX
)
7127 before
= gen_reg_rtx (mode
);
7129 emit_move_insn (before
, mem
);
7131 if (code
== MULT
) /* NAND operation */
7133 rtx x
= expand_simple_binop (mode
, AND
, before
, val
,
7134 NULL_RTX
, 1, OPTAB_LIB_WIDEN
);
7135 after
= expand_simple_unop (mode
, NOT
, x
, after
, 1);
7139 after
= expand_simple_binop (mode
, code
, before
, val
,
7140 after
, 1, OPTAB_LIB_WIDEN
);
7143 emit_move_insn (mem
, after
);
7145 if (orig_after
&& after
!= orig_after
)
7146 emit_move_insn (orig_after
, after
);
7149 /* Implement TARGET_MODES_TIEABLE_P. */
7152 spu_modes_tieable_p (machine_mode mode1
, machine_mode mode2
)
7154 return (GET_MODE_BITSIZE (mode1
) <= MAX_FIXED_MODE_SIZE
7155 && GET_MODE_BITSIZE (mode2
) <= MAX_FIXED_MODE_SIZE
);
7158 /* Table of machine attributes. */
7159 static const struct attribute_spec spu_attribute_table
[] =
7161 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7162 affects_type_identity } */
7163 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7165 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7167 { NULL
, 0, 0, false, false, false, NULL
, false }
7170 /* TARGET overrides. */
7173 #define TARGET_LRA_P hook_bool_void_false
7175 #undef TARGET_ADDR_SPACE_POINTER_MODE
7176 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7178 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7179 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7181 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7182 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7183 spu_addr_space_legitimate_address_p
7185 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7186 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7188 #undef TARGET_ADDR_SPACE_SUBSET_P
7189 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7191 #undef TARGET_ADDR_SPACE_CONVERT
7192 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7194 #undef TARGET_INIT_BUILTINS
7195 #define TARGET_INIT_BUILTINS spu_init_builtins
7196 #undef TARGET_BUILTIN_DECL
7197 #define TARGET_BUILTIN_DECL spu_builtin_decl
7199 #undef TARGET_EXPAND_BUILTIN
7200 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7202 #undef TARGET_UNWIND_WORD_MODE
7203 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7205 #undef TARGET_LEGITIMIZE_ADDRESS
7206 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7208 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7209 and .quad for the debugger. When it is known that the assembler is fixed,
7210 these can be removed. */
7211 #undef TARGET_ASM_UNALIGNED_SI_OP
7212 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7214 #undef TARGET_ASM_ALIGNED_DI_OP
7215 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7217 /* The .8byte directive doesn't seem to work well for a 32 bit
7219 #undef TARGET_ASM_UNALIGNED_DI_OP
7220 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7222 #undef TARGET_RTX_COSTS
7223 #define TARGET_RTX_COSTS spu_rtx_costs
7225 #undef TARGET_ADDRESS_COST
7226 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7228 #undef TARGET_SCHED_ISSUE_RATE
7229 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7231 #undef TARGET_SCHED_INIT_GLOBAL
7232 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7234 #undef TARGET_SCHED_INIT
7235 #define TARGET_SCHED_INIT spu_sched_init
7237 #undef TARGET_SCHED_VARIABLE_ISSUE
7238 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7240 #undef TARGET_SCHED_REORDER
7241 #define TARGET_SCHED_REORDER spu_sched_reorder
7243 #undef TARGET_SCHED_REORDER2
7244 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7246 #undef TARGET_SCHED_ADJUST_COST
7247 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7249 #undef TARGET_ATTRIBUTE_TABLE
7250 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7252 #undef TARGET_ASM_INTEGER
7253 #define TARGET_ASM_INTEGER spu_assemble_integer
7255 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7256 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7258 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7259 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7261 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7262 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7264 #undef TARGET_ASM_GLOBALIZE_LABEL
7265 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7267 #undef TARGET_PASS_BY_REFERENCE
7268 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7270 #undef TARGET_FUNCTION_ARG
7271 #define TARGET_FUNCTION_ARG spu_function_arg
7273 #undef TARGET_FUNCTION_ARG_ADVANCE
7274 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7276 #undef TARGET_FUNCTION_ARG_PADDING
7277 #define TARGET_FUNCTION_ARG_PADDING spu_function_arg_padding
7279 #undef TARGET_MUST_PASS_IN_STACK
7280 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7282 #undef TARGET_BUILD_BUILTIN_VA_LIST
7283 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7285 #undef TARGET_EXPAND_BUILTIN_VA_START
7286 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7288 #undef TARGET_SETUP_INCOMING_VARARGS
7289 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7291 #undef TARGET_MACHINE_DEPENDENT_REORG
7292 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7294 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7295 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7297 #undef TARGET_INIT_LIBFUNCS
7298 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7300 #undef TARGET_RETURN_IN_MEMORY
7301 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7303 #undef TARGET_ENCODE_SECTION_INFO
7304 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7306 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7307 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7309 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7310 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7312 #undef TARGET_VECTORIZE_INIT_COST
7313 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7315 #undef TARGET_VECTORIZE_ADD_STMT_COST
7316 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7318 #undef TARGET_VECTORIZE_FINISH_COST
7319 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7321 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7322 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7324 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7325 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7327 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7328 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7330 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7331 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7333 #undef TARGET_SCHED_SMS_RES_MII
7334 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7336 #undef TARGET_SECTION_TYPE_FLAGS
7337 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7339 #undef TARGET_ASM_SELECT_SECTION
7340 #define TARGET_ASM_SELECT_SECTION spu_select_section
7342 #undef TARGET_ASM_UNIQUE_SECTION
7343 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7345 #undef TARGET_LEGITIMATE_ADDRESS_P
7346 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7348 #undef TARGET_LEGITIMATE_CONSTANT_P
7349 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7351 #undef TARGET_TRAMPOLINE_INIT
7352 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7354 #undef TARGET_WARN_FUNC_RETURN
7355 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7357 #undef TARGET_OPTION_OVERRIDE
7358 #define TARGET_OPTION_OVERRIDE spu_option_override
7360 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7361 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7363 #undef TARGET_REF_MAY_ALIAS_ERRNO
7364 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7366 #undef TARGET_ASM_OUTPUT_MI_THUNK
7367 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7368 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7369 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7371 /* Variable tracking should be run after all optimizations which
7372 change order of insns. It also needs a valid CFG. */
7373 #undef TARGET_DELAY_VARTRACK
7374 #define TARGET_DELAY_VARTRACK true
7376 #undef TARGET_CANONICALIZE_COMPARISON
7377 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7379 #undef TARGET_CAN_USE_DOLOOP_P
7380 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7382 #undef TARGET_MODES_TIEABLE_P
7383 #define TARGET_MODES_TIEABLE_P spu_modes_tieable_p
7385 struct gcc_target targetm
= TARGET_INITIALIZER
;