1 /* Copyright (C) 2006-2013 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
36 #include "basic-block.h"
37 #include "diagnostic-core.h"
42 #include "target-def.h"
43 #include "langhooks.h"
45 #include "sched-int.h"
49 #include "tm-constrs.h"
57 /* Builtin types, data and prototypes. */
59 enum spu_builtin_type_index
61 SPU_BTI_END_OF_PARAMS
,
63 /* We create new type nodes for these. */
75 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 /* These all correspond to intSI_type_node */
92 /* These correspond to the standard types */
112 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
113 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
114 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
115 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
116 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
117 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
118 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
119 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
120 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
121 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
123 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
125 struct spu_builtin_range
130 static struct spu_builtin_range spu_builtin_range
[] = {
131 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
132 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
133 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
134 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
135 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
136 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
137 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
138 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
139 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
140 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
141 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
142 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
146 /* Target specific attribute specifications. */
147 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
149 /* Prototypes and external defs. */
150 static int get_pipe (rtx insn
);
151 static int spu_naked_function_p (tree func
);
152 static int mem_is_padded_component_ref (rtx x
);
153 static void fix_range (const char *);
154 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
156 /* Which instruction set architecture to use. */
158 /* Which cpu are we tuning for. */
161 /* The hardware requires 8 insns between a hint and the branch it
162 effects. This variable describes how many rtl instructions the
163 compiler needs to see before inserting a hint, and then the compiler
164 will insert enough nops to make it at least 8 insns. The default is
165 for the compiler to allow up to 2 nops be emitted. The nops are
166 inserted in pairs, so we round down. */
167 int spu_hint_dist
= (8*4) - (2*4);
182 IC_POOL
, /* constant pool */
183 IC_IL1
, /* one il* instruction */
184 IC_IL2
, /* both ilhu and iohl instructions */
185 IC_IL1s
, /* one il* instruction */
186 IC_IL2s
, /* both ilhu and iohl instructions */
187 IC_FSMBI
, /* the fsmbi instruction */
188 IC_CPAT
, /* one of the c*d instructions */
189 IC_FSMBI2
/* fsmbi plus 1 other instruction */
192 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
193 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
194 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
195 static enum immediate_class
classify_immediate (rtx op
,
196 enum machine_mode mode
);
198 /* Pointer mode for __ea references. */
199 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
202 /* Define the structure for the machine field in struct function. */
203 struct GTY(()) machine_function
205 /* Register to use for PIC accesses. */
209 /* How to allocate a 'struct machine_function'. */
210 static struct machine_function
*
211 spu_init_machine_status (void)
213 return ggc_alloc_cleared_machine_function ();
216 /* Implement TARGET_OPTION_OVERRIDE. */
218 spu_option_override (void)
220 /* Set up function hooks. */
221 init_machine_status
= spu_init_machine_status
;
223 /* Small loops will be unpeeled at -O3. For SPU it is more important
224 to keep code small by default. */
225 if (!flag_unroll_loops
&& !flag_peel_loops
)
226 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
227 global_options
.x_param_values
,
228 global_options_set
.x_param_values
);
230 flag_omit_frame_pointer
= 1;
232 /* Functions must be 8 byte aligned so we correctly handle dual issue */
233 if (align_functions
< 8)
236 spu_hint_dist
= 8*4 - spu_max_nops
*4;
237 if (spu_hint_dist
< 0)
240 if (spu_fixed_range_string
)
241 fix_range (spu_fixed_range_string
);
243 /* Determine processor architectural level. */
246 if (strcmp (&spu_arch_string
[0], "cell") == 0)
247 spu_arch
= PROCESSOR_CELL
;
248 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
249 spu_arch
= PROCESSOR_CELLEDP
;
251 error ("bad value (%s) for -march= switch", spu_arch_string
);
254 /* Determine processor to tune for. */
257 if (strcmp (&spu_tune_string
[0], "cell") == 0)
258 spu_tune
= PROCESSOR_CELL
;
259 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
260 spu_tune
= PROCESSOR_CELLEDP
;
262 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
265 /* Change defaults according to the processor architecture. */
266 if (spu_arch
== PROCESSOR_CELLEDP
)
268 /* If no command line option has been otherwise specified, change
269 the default to -mno-safe-hints on celledp -- only the original
270 Cell/B.E. processors require this workaround. */
271 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
272 target_flags
&= ~MASK_SAFE_HINTS
;
275 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
278 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
279 struct attribute_spec.handler. */
281 /* True if MODE is valid for the target. By "valid", we mean able to
282 be manipulated in non-trivial ways. In particular, this means all
283 the arithmetic is supported. */
285 spu_scalar_mode_supported_p (enum machine_mode mode
)
303 /* Similarly for vector modes. "Supported" here is less strict. At
304 least some operations are supported; need to check optabs or builtins
305 for further details. */
307 spu_vector_mode_supported_p (enum machine_mode mode
)
324 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
325 least significant bytes of the outer mode. This function returns
326 TRUE for the SUBREG's where this is correct. */
328 valid_subreg (rtx op
)
330 enum machine_mode om
= GET_MODE (op
);
331 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
332 return om
!= VOIDmode
&& im
!= VOIDmode
333 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
334 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
335 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
338 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
339 and adjust the start offset. */
341 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
343 enum machine_mode mode
;
345 /* Strip any paradoxical SUBREG. */
346 if (GET_CODE (op
) == SUBREG
347 && (GET_MODE_BITSIZE (GET_MODE (op
))
348 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
352 GET_MODE_BITSIZE (GET_MODE (op
)) -
353 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
354 op
= SUBREG_REG (op
);
356 /* If it is smaller than SI, assure a SUBREG */
357 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
361 *start
+= 32 - op_size
;
364 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
365 mode
= mode_for_size (op_size
, MODE_INT
, 0);
366 if (mode
!= GET_MODE (op
))
367 op
= gen_rtx_SUBREG (mode
, op
, 0);
372 spu_expand_extv (rtx ops
[], int unsignedp
)
374 rtx dst
= ops
[0], src
= ops
[1];
375 HOST_WIDE_INT width
= INTVAL (ops
[2]);
376 HOST_WIDE_INT start
= INTVAL (ops
[3]);
377 HOST_WIDE_INT align_mask
;
378 rtx s0
, s1
, mask
, r0
;
380 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
384 /* First, determine if we need 1 TImode load or 2. We need only 1
385 if the bits being extracted do not cross the alignment boundary
386 as determined by the MEM and its address. */
388 align_mask
= -MEM_ALIGN (src
);
389 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
391 /* Alignment is sufficient for 1 load. */
392 s0
= gen_reg_rtx (TImode
);
393 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
396 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
401 s0
= gen_reg_rtx (TImode
);
402 s1
= gen_reg_rtx (TImode
);
403 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
406 gcc_assert (start
+ width
<= 128);
409 rtx r1
= gen_reg_rtx (SImode
);
410 mask
= gen_reg_rtx (TImode
);
411 emit_move_insn (mask
, GEN_INT (-1));
412 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
413 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
414 if (GET_CODE (r0
) == CONST_INT
)
415 r1
= GEN_INT (INTVAL (r0
) & 15);
417 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
418 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
419 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
424 else if (GET_CODE (src
) == SUBREG
)
426 rtx r
= SUBREG_REG (src
);
427 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
428 s0
= gen_reg_rtx (TImode
);
429 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
430 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
432 emit_move_insn (s0
, src
);
436 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
437 s0
= gen_reg_rtx (TImode
);
438 emit_move_insn (s0
, src
);
441 /* Now s0 is TImode and contains the bits to extract at start. */
444 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
447 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
449 emit_move_insn (dst
, s0
);
453 spu_expand_insv (rtx ops
[])
455 HOST_WIDE_INT width
= INTVAL (ops
[1]);
456 HOST_WIDE_INT start
= INTVAL (ops
[2]);
457 HOST_WIDE_INT maskbits
;
458 enum machine_mode dst_mode
;
459 rtx dst
= ops
[0], src
= ops
[3];
466 if (GET_CODE (ops
[0]) == MEM
)
467 dst
= gen_reg_rtx (TImode
);
469 dst
= adjust_operand (dst
, &start
);
470 dst_mode
= GET_MODE (dst
);
471 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
473 if (CONSTANT_P (src
))
475 enum machine_mode m
=
476 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
477 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
479 src
= adjust_operand (src
, 0);
481 mask
= gen_reg_rtx (dst_mode
);
482 shift_reg
= gen_reg_rtx (dst_mode
);
483 shift
= dst_size
- start
- width
;
485 /* It's not safe to use subreg here because the compiler assumes
486 that the SUBREG_REG is right justified in the SUBREG. */
487 convert_move (shift_reg
, src
, 1);
494 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
497 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
500 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
512 maskbits
= (-1ll << (32 - width
- start
));
514 maskbits
+= (1ll << (32 - start
));
515 emit_move_insn (mask
, GEN_INT (maskbits
));
518 maskbits
= (-1ll << (64 - width
- start
));
520 maskbits
+= (1ll << (64 - start
));
521 emit_move_insn (mask
, GEN_INT (maskbits
));
525 unsigned char arr
[16];
527 memset (arr
, 0, sizeof (arr
));
528 arr
[i
] = 0xff >> (start
& 7);
529 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
531 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
532 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
538 if (GET_CODE (ops
[0]) == MEM
)
540 rtx low
= gen_reg_rtx (SImode
);
541 rtx rotl
= gen_reg_rtx (SImode
);
542 rtx mask0
= gen_reg_rtx (TImode
);
548 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
549 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
550 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
551 emit_insn (gen_negsi2 (rotl
, low
));
552 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
553 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
554 mem
= change_address (ops
[0], TImode
, addr0
);
555 set_mem_alias_set (mem
, 0);
556 emit_move_insn (dst
, mem
);
557 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
558 if (start
+ width
> MEM_ALIGN (ops
[0]))
560 rtx shl
= gen_reg_rtx (SImode
);
561 rtx mask1
= gen_reg_rtx (TImode
);
562 rtx dst1
= gen_reg_rtx (TImode
);
564 addr1
= plus_constant (Pmode
, addr
, 16);
565 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
566 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
567 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
568 mem1
= change_address (ops
[0], TImode
, addr1
);
569 set_mem_alias_set (mem1
, 0);
570 emit_move_insn (dst1
, mem1
);
571 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
572 emit_move_insn (mem1
, dst1
);
574 emit_move_insn (mem
, dst
);
577 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
582 spu_expand_block_move (rtx ops
[])
584 HOST_WIDE_INT bytes
, align
, offset
;
585 rtx src
, dst
, sreg
, dreg
, target
;
587 if (GET_CODE (ops
[2]) != CONST_INT
588 || GET_CODE (ops
[3]) != CONST_INT
589 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
592 bytes
= INTVAL (ops
[2]);
593 align
= INTVAL (ops
[3]);
603 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
605 dst
= adjust_address (ops
[0], V16QImode
, offset
);
606 src
= adjust_address (ops
[1], V16QImode
, offset
);
607 emit_move_insn (dst
, src
);
612 unsigned char arr
[16] = { 0 };
613 for (i
= 0; i
< bytes
- offset
; i
++)
615 dst
= adjust_address (ops
[0], V16QImode
, offset
);
616 src
= adjust_address (ops
[1], V16QImode
, offset
);
617 mask
= gen_reg_rtx (V16QImode
);
618 sreg
= gen_reg_rtx (V16QImode
);
619 dreg
= gen_reg_rtx (V16QImode
);
620 target
= gen_reg_rtx (V16QImode
);
621 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
622 emit_move_insn (dreg
, dst
);
623 emit_move_insn (sreg
, src
);
624 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
625 emit_move_insn (dst
, target
);
633 { SPU_EQ
, SPU_GT
, SPU_GTU
};
635 int spu_comp_icode
[12][3] = {
636 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
637 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
638 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
639 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
640 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
641 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
642 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
643 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
644 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
645 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
646 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
647 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
650 /* Generate a compare for CODE. Return a brand-new rtx that represents
651 the result of the compare. GCC can figure this out too if we don't
652 provide all variations of compares, but GCC always wants to use
653 WORD_MODE, we can generate better code in most cases if we do it
656 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
658 int reverse_compare
= 0;
659 int reverse_test
= 0;
660 rtx compare_result
, eq_result
;
661 rtx comp_rtx
, eq_rtx
;
662 enum machine_mode comp_mode
;
663 enum machine_mode op_mode
;
664 enum spu_comp_code scode
, eq_code
;
665 enum insn_code ior_code
;
666 enum rtx_code code
= GET_CODE (cmp
);
667 rtx op0
= XEXP (cmp
, 0);
668 rtx op1
= XEXP (cmp
, 1);
672 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
673 and so on, to keep the constant in operand 1. */
674 if (GET_CODE (op1
) == CONST_INT
)
676 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
677 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
701 /* However, if we generate an integer result, performing a reverse test
702 would require an extra negation, so avoid that where possible. */
703 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
705 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
706 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
723 op_mode
= GET_MODE (op0
);
729 if (HONOR_NANS (op_mode
))
744 if (HONOR_NANS (op_mode
))
836 comp_mode
= V4SImode
;
840 comp_mode
= V2DImode
;
847 if (GET_MODE (op1
) == DFmode
848 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
851 if (is_set
== 0 && op1
== const0_rtx
852 && (GET_MODE (op0
) == SImode
853 || GET_MODE (op0
) == HImode
854 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
856 /* Don't need to set a register with the result when we are
857 comparing against zero and branching. */
858 reverse_test
= !reverse_test
;
859 compare_result
= op0
;
863 compare_result
= gen_reg_rtx (comp_mode
);
872 if (spu_comp_icode
[index
][scode
] == 0)
875 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
877 op0
= force_reg (op_mode
, op0
);
878 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
880 op1
= force_reg (op_mode
, op1
);
881 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
885 emit_insn (comp_rtx
);
889 eq_result
= gen_reg_rtx (comp_mode
);
890 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
895 ior_code
= optab_handler (ior_optab
, comp_mode
);
896 gcc_assert (ior_code
!= CODE_FOR_nothing
);
897 emit_insn (GEN_FCN (ior_code
)
898 (compare_result
, compare_result
, eq_result
));
907 /* We don't have branch on QI compare insns, so we convert the
908 QI compare result to a HI result. */
909 if (comp_mode
== QImode
)
911 rtx old_res
= compare_result
;
912 compare_result
= gen_reg_rtx (HImode
);
914 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
918 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
920 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
922 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
923 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
924 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
927 else if (is_set
== 2)
929 rtx target
= operands
[0];
930 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
931 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
932 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
934 rtx op_t
= operands
[2];
935 rtx op_f
= operands
[3];
937 /* The result of the comparison can be SI, HI or QI mode. Create a
938 mask based on that result. */
939 if (target_size
> compare_size
)
941 select_mask
= gen_reg_rtx (mode
);
942 emit_insn (gen_extend_compare (select_mask
, compare_result
));
944 else if (target_size
< compare_size
)
946 gen_rtx_SUBREG (mode
, compare_result
,
947 (compare_size
- target_size
) / BITS_PER_UNIT
);
948 else if (comp_mode
!= mode
)
949 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
951 select_mask
= compare_result
;
953 if (GET_MODE (target
) != GET_MODE (op_t
)
954 || GET_MODE (target
) != GET_MODE (op_f
))
958 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
960 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
964 rtx target
= operands
[0];
966 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
967 gen_rtx_NOT (comp_mode
, compare_result
)));
968 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
969 emit_insn (gen_extendhisi2 (target
, compare_result
));
970 else if (GET_MODE (target
) == SImode
971 && GET_MODE (compare_result
) == QImode
)
972 emit_insn (gen_extend_compare (target
, compare_result
));
974 emit_move_insn (target
, compare_result
);
979 const_double_to_hwint (rtx x
)
983 if (GET_MODE (x
) == SFmode
)
985 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
986 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
988 else if (GET_MODE (x
) == DFmode
)
991 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
992 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
994 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1002 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1006 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1009 tv
[0] = (v
<< 32) >> 32;
1010 else if (mode
== DFmode
)
1012 tv
[1] = (v
<< 32) >> 32;
1015 real_from_target (&rv
, tv
, mode
);
1016 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1020 print_operand_address (FILE * file
, register rtx addr
)
1025 if (GET_CODE (addr
) == AND
1026 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1027 && INTVAL (XEXP (addr
, 1)) == -16)
1028 addr
= XEXP (addr
, 0);
1030 switch (GET_CODE (addr
))
1033 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1037 reg
= XEXP (addr
, 0);
1038 offset
= XEXP (addr
, 1);
1039 if (GET_CODE (offset
) == REG
)
1041 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1042 reg_names
[REGNO (offset
)]);
1044 else if (GET_CODE (offset
) == CONST_INT
)
1046 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1047 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1057 output_addr_const (file
, addr
);
1067 print_operand (FILE * file
, rtx x
, int code
)
1069 enum machine_mode mode
= GET_MODE (x
);
1071 unsigned char arr
[16];
1072 int xcode
= GET_CODE (x
);
1074 if (GET_MODE (x
) == VOIDmode
)
1077 case 'L': /* 128 bits, signed */
1078 case 'm': /* 128 bits, signed */
1079 case 'T': /* 128 bits, signed */
1080 case 't': /* 128 bits, signed */
1083 case 'K': /* 64 bits, signed */
1084 case 'k': /* 64 bits, signed */
1085 case 'D': /* 64 bits, signed */
1086 case 'd': /* 64 bits, signed */
1089 case 'J': /* 32 bits, signed */
1090 case 'j': /* 32 bits, signed */
1091 case 's': /* 32 bits, signed */
1092 case 'S': /* 32 bits, signed */
1099 case 'j': /* 32 bits, signed */
1100 case 'k': /* 64 bits, signed */
1101 case 'm': /* 128 bits, signed */
1102 if (xcode
== CONST_INT
1103 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1105 gcc_assert (logical_immediate_p (x
, mode
));
1106 constant_to_array (mode
, x
, arr
);
1107 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1108 val
= trunc_int_for_mode (val
, SImode
);
1109 switch (which_logical_immediate (val
))
1114 fprintf (file
, "h");
1117 fprintf (file
, "b");
1127 case 'J': /* 32 bits, signed */
1128 case 'K': /* 64 bits, signed */
1129 case 'L': /* 128 bits, signed */
1130 if (xcode
== CONST_INT
1131 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1133 gcc_assert (logical_immediate_p (x
, mode
)
1134 || iohl_immediate_p (x
, mode
));
1135 constant_to_array (mode
, x
, arr
);
1136 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1137 val
= trunc_int_for_mode (val
, SImode
);
1138 switch (which_logical_immediate (val
))
1144 val
= trunc_int_for_mode (val
, HImode
);
1147 val
= trunc_int_for_mode (val
, QImode
);
1152 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1158 case 't': /* 128 bits, signed */
1159 case 'd': /* 64 bits, signed */
1160 case 's': /* 32 bits, signed */
1163 enum immediate_class c
= classify_immediate (x
, mode
);
1167 constant_to_array (mode
, x
, arr
);
1168 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1169 val
= trunc_int_for_mode (val
, SImode
);
1170 switch (which_immediate_load (val
))
1175 fprintf (file
, "a");
1178 fprintf (file
, "h");
1181 fprintf (file
, "hu");
1188 constant_to_array (mode
, x
, arr
);
1189 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1191 fprintf (file
, "b");
1193 fprintf (file
, "h");
1195 fprintf (file
, "w");
1197 fprintf (file
, "d");
1200 if (xcode
== CONST_VECTOR
)
1202 x
= CONST_VECTOR_ELT (x
, 0);
1203 xcode
= GET_CODE (x
);
1205 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1206 fprintf (file
, "a");
1207 else if (xcode
== HIGH
)
1208 fprintf (file
, "hu");
1222 case 'T': /* 128 bits, signed */
1223 case 'D': /* 64 bits, signed */
1224 case 'S': /* 32 bits, signed */
1227 enum immediate_class c
= classify_immediate (x
, mode
);
1231 constant_to_array (mode
, x
, arr
);
1232 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1233 val
= trunc_int_for_mode (val
, SImode
);
1234 switch (which_immediate_load (val
))
1241 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1246 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1249 constant_to_array (mode
, x
, arr
);
1251 for (i
= 0; i
< 16; i
++)
1256 print_operand (file
, GEN_INT (val
), 0);
1259 constant_to_array (mode
, x
, arr
);
1260 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1261 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1266 if (GET_CODE (x
) == CONST_VECTOR
)
1267 x
= CONST_VECTOR_ELT (x
, 0);
1268 output_addr_const (file
, x
);
1270 fprintf (file
, "@h");
1284 if (xcode
== CONST_INT
)
1286 /* Only 4 least significant bits are relevant for generate
1287 control word instructions. */
1288 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1293 case 'M': /* print code for c*d */
1294 if (GET_CODE (x
) == CONST_INT
)
1298 fprintf (file
, "b");
1301 fprintf (file
, "h");
1304 fprintf (file
, "w");
1307 fprintf (file
, "d");
1316 case 'N': /* Negate the operand */
1317 if (xcode
== CONST_INT
)
1318 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1319 else if (xcode
== CONST_VECTOR
)
1320 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1321 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1324 case 'I': /* enable/disable interrupts */
1325 if (xcode
== CONST_INT
)
1326 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1329 case 'b': /* branch modifiers */
1331 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1332 else if (COMPARISON_P (x
))
1333 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1336 case 'i': /* indirect call */
1339 if (GET_CODE (XEXP (x
, 0)) == REG
)
1340 /* Used in indirect function calls. */
1341 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1343 output_address (XEXP (x
, 0));
1347 case 'p': /* load/store */
1351 xcode
= GET_CODE (x
);
1356 xcode
= GET_CODE (x
);
1359 fprintf (file
, "d");
1360 else if (xcode
== CONST_INT
)
1361 fprintf (file
, "a");
1362 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1363 fprintf (file
, "r");
1364 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1366 if (GET_CODE (XEXP (x
, 1)) == REG
)
1367 fprintf (file
, "x");
1369 fprintf (file
, "d");
1374 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1376 output_addr_const (file
, GEN_INT (val
));
1380 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1382 output_addr_const (file
, GEN_INT (val
));
1386 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1388 output_addr_const (file
, GEN_INT (val
));
1392 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1393 val
= (val
>> 3) & 0x1f;
1394 output_addr_const (file
, GEN_INT (val
));
1398 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1401 output_addr_const (file
, GEN_INT (val
));
1405 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1408 output_addr_const (file
, GEN_INT (val
));
1412 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1415 output_addr_const (file
, GEN_INT (val
));
1419 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1420 val
= -(val
& -8ll);
1421 val
= (val
>> 3) & 0x1f;
1422 output_addr_const (file
, GEN_INT (val
));
1427 constant_to_array (mode
, x
, arr
);
1428 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1429 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1434 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1435 else if (xcode
== MEM
)
1436 output_address (XEXP (x
, 0));
1437 else if (xcode
== CONST_VECTOR
)
1438 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1440 output_addr_const (file
, x
);
1447 output_operand_lossage ("invalid %%xn code");
1452 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1453 caller saved register. For leaf functions it is more efficient to
1454 use a volatile register because we won't need to save and restore the
1455 pic register. This routine is only valid after register allocation
1456 is completed, so we can pick an unused register. */
1460 if (!reload_completed
&& !reload_in_progress
)
1463 /* If we've already made the decision, we need to keep with it. Once we've
1464 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1465 return true since the register is now live; this should not cause us to
1466 "switch back" to using pic_offset_table_rtx. */
1467 if (!cfun
->machine
->pic_reg
)
1469 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1470 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1472 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1475 return cfun
->machine
->pic_reg
;
1478 /* Split constant addresses to handle cases that are too large.
1479 Add in the pic register when in PIC mode.
1480 Split immediates that require more than 1 instruction. */
1482 spu_split_immediate (rtx
* ops
)
1484 enum machine_mode mode
= GET_MODE (ops
[0]);
1485 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1491 unsigned char arrhi
[16];
1492 unsigned char arrlo
[16];
1493 rtx to
, temp
, hi
, lo
;
1495 enum machine_mode imode
= mode
;
1496 /* We need to do reals as ints because the constant used in the
1497 IOR might not be a legitimate real constant. */
1498 imode
= int_mode_for_mode (mode
);
1499 constant_to_array (mode
, ops
[1], arrhi
);
1501 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1504 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1505 for (i
= 0; i
< 16; i
+= 4)
1507 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1508 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1509 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1510 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1512 hi
= array_to_constant (imode
, arrhi
);
1513 lo
= array_to_constant (imode
, arrlo
);
1514 emit_move_insn (temp
, hi
);
1515 emit_insn (gen_rtx_SET
1516 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1521 unsigned char arr_fsmbi
[16];
1522 unsigned char arr_andbi
[16];
1523 rtx to
, reg_fsmbi
, reg_and
;
1525 enum machine_mode imode
= mode
;
1526 /* We need to do reals as ints because the constant used in the
1527 * AND might not be a legitimate real constant. */
1528 imode
= int_mode_for_mode (mode
);
1529 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1531 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1534 for (i
= 0; i
< 16; i
++)
1535 if (arr_fsmbi
[i
] != 0)
1537 arr_andbi
[0] = arr_fsmbi
[i
];
1538 arr_fsmbi
[i
] = 0xff;
1540 for (i
= 1; i
< 16; i
++)
1541 arr_andbi
[i
] = arr_andbi
[0];
1542 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1543 reg_and
= array_to_constant (imode
, arr_andbi
);
1544 emit_move_insn (to
, reg_fsmbi
);
1545 emit_insn (gen_rtx_SET
1546 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1550 if (reload_in_progress
|| reload_completed
)
1552 rtx mem
= force_const_mem (mode
, ops
[1]);
1553 if (TARGET_LARGE_MEM
)
1555 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1556 emit_move_insn (addr
, XEXP (mem
, 0));
1557 mem
= replace_equiv_address (mem
, addr
);
1559 emit_move_insn (ops
[0], mem
);
1565 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1569 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1570 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1573 emit_insn (gen_pic (ops
[0], ops
[1]));
1576 rtx pic_reg
= get_pic_reg ();
1577 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1579 return flag_pic
|| c
== IC_IL2s
;
1590 /* SAVING is TRUE when we are generating the actual load and store
1591 instructions for REGNO. When determining the size of the stack
1592 needed for saving register we must allocate enough space for the
1593 worst case, because we don't always have the information early enough
1594 to not allocate it. But we can at least eliminate the actual loads
1595 and stores during the prologue/epilogue. */
1597 need_to_save_reg (int regno
, int saving
)
1599 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1602 && regno
== PIC_OFFSET_TABLE_REGNUM
1603 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1608 /* This function is only correct starting with local register
1611 spu_saved_regs_size (void)
1613 int reg_save_size
= 0;
1616 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1617 if (need_to_save_reg (regno
, 0))
1618 reg_save_size
+= 0x10;
1619 return reg_save_size
;
1623 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1625 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1627 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1628 return emit_insn (gen_movv4si (mem
, reg
));
1632 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1634 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1636 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1637 return emit_insn (gen_movv4si (reg
, mem
));
1640 /* This happens after reload, so we need to expand it. */
1642 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1645 if (satisfies_constraint_K (GEN_INT (imm
)))
1647 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1651 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1652 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1653 if (REGNO (src
) == REGNO (scratch
))
1659 /* Return nonzero if this function is known to have a null epilogue. */
1662 direct_return (void)
1664 if (reload_completed
)
1666 if (cfun
->static_chain_decl
== 0
1667 && (spu_saved_regs_size ()
1669 + crtl
->outgoing_args_size
1670 + crtl
->args
.pretend_args_size
== 0)
1678 The stack frame looks like this:
1682 AP -> +-------------+
1685 prev SP | back chain |
1688 | reg save | crtl->args.pretend_args_size bytes
1691 | saved regs | spu_saved_regs_size() bytes
1692 FP -> +-------------+
1694 | vars | get_frame_size() bytes
1695 HFP -> +-------------+
1698 | args | crtl->outgoing_args_size bytes
1704 SP -> +-------------+
1708 spu_expand_prologue (void)
1710 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1711 HOST_WIDE_INT total_size
;
1712 HOST_WIDE_INT saved_regs_size
;
1713 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1714 rtx scratch_reg_0
, scratch_reg_1
;
1717 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1718 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1720 if (spu_naked_function_p (current_function_decl
))
1723 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1724 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1726 saved_regs_size
= spu_saved_regs_size ();
1727 total_size
= size
+ saved_regs_size
1728 + crtl
->outgoing_args_size
1729 + crtl
->args
.pretend_args_size
;
1732 || cfun
->calls_alloca
|| total_size
> 0)
1733 total_size
+= STACK_POINTER_OFFSET
;
1735 /* Save this first because code after this might use the link
1736 register as a scratch register. */
1739 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1740 RTX_FRAME_RELATED_P (insn
) = 1;
1745 offset
= -crtl
->args
.pretend_args_size
;
1746 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1747 if (need_to_save_reg (regno
, 1))
1750 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1751 RTX_FRAME_RELATED_P (insn
) = 1;
1755 if (flag_pic
&& cfun
->machine
->pic_reg
)
1757 rtx pic_reg
= cfun
->machine
->pic_reg
;
1758 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1759 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1764 if (flag_stack_check
)
1766 /* We compare against total_size-1 because
1767 ($sp >= total_size) <=> ($sp > total_size-1) */
1768 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1769 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1770 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1771 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1773 emit_move_insn (scratch_v4si
, size_v4si
);
1774 size_v4si
= scratch_v4si
;
1776 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1777 emit_insn (gen_vec_extractv4si
1778 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1779 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1782 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1783 the value of the previous $sp because we save it as the back
1785 if (total_size
<= 2000)
1787 /* In this case we save the back chain first. */
1788 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1790 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1794 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1796 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1798 RTX_FRAME_RELATED_P (insn
) = 1;
1799 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1800 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1802 if (total_size
> 2000)
1804 /* Save the back chain ptr */
1805 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1808 if (frame_pointer_needed
)
1810 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1811 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1812 + crtl
->outgoing_args_size
;
1813 /* Set the new frame_pointer */
1814 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1815 RTX_FRAME_RELATED_P (insn
) = 1;
1816 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1817 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1818 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1822 if (flag_stack_usage_info
)
1823 current_function_static_stack_size
= total_size
;
1827 spu_expand_epilogue (bool sibcall_p
)
1829 int size
= get_frame_size (), offset
, regno
;
1830 HOST_WIDE_INT saved_regs_size
, total_size
;
1831 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1834 if (spu_naked_function_p (current_function_decl
))
1837 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1839 saved_regs_size
= spu_saved_regs_size ();
1840 total_size
= size
+ saved_regs_size
1841 + crtl
->outgoing_args_size
1842 + crtl
->args
.pretend_args_size
;
1845 || cfun
->calls_alloca
|| total_size
> 0)
1846 total_size
+= STACK_POINTER_OFFSET
;
1850 if (cfun
->calls_alloca
)
1851 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1853 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1856 if (saved_regs_size
> 0)
1858 offset
= -crtl
->args
.pretend_args_size
;
1859 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1860 if (need_to_save_reg (regno
, 1))
1863 frame_emit_load (regno
, sp_reg
, offset
);
1869 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1873 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1874 emit_jump_insn (gen__return ());
1879 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1883 /* This is inefficient because it ends up copying to a save-register
1884 which then gets saved even though $lr has already been saved. But
1885 it does generate better code for leaf functions and we don't need
1886 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1887 used for __builtin_return_address anyway, so maybe we don't care if
1888 it's inefficient. */
1889 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1893 /* Given VAL, generate a constant appropriate for MODE.
1894 If MODE is a vector mode, every element will be VAL.
1895 For TImode, VAL will be zero extended to 128 bits. */
1897 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
1903 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1904 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1905 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1906 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1908 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1909 return immed_double_const (val
, 0, mode
);
1911 /* val is the bit representation of the float */
1912 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1913 return hwint_to_const_double (mode
, val
);
1915 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1916 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1918 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1920 units
= GET_MODE_NUNITS (mode
);
1922 v
= rtvec_alloc (units
);
1924 for (i
= 0; i
< units
; ++i
)
1925 RTVEC_ELT (v
, i
) = inner
;
1927 return gen_rtx_CONST_VECTOR (mode
, v
);
1930 /* Create a MODE vector constant from 4 ints. */
1932 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
1934 unsigned char arr
[16];
1935 arr
[0] = (a
>> 24) & 0xff;
1936 arr
[1] = (a
>> 16) & 0xff;
1937 arr
[2] = (a
>> 8) & 0xff;
1938 arr
[3] = (a
>> 0) & 0xff;
1939 arr
[4] = (b
>> 24) & 0xff;
1940 arr
[5] = (b
>> 16) & 0xff;
1941 arr
[6] = (b
>> 8) & 0xff;
1942 arr
[7] = (b
>> 0) & 0xff;
1943 arr
[8] = (c
>> 24) & 0xff;
1944 arr
[9] = (c
>> 16) & 0xff;
1945 arr
[10] = (c
>> 8) & 0xff;
1946 arr
[11] = (c
>> 0) & 0xff;
1947 arr
[12] = (d
>> 24) & 0xff;
1948 arr
[13] = (d
>> 16) & 0xff;
1949 arr
[14] = (d
>> 8) & 0xff;
1950 arr
[15] = (d
>> 0) & 0xff;
1951 return array_to_constant(mode
, arr
);
1954 /* branch hint stuff */
1956 /* An array of these is used to propagate hints to predecessor blocks. */
1959 rtx prop_jump
; /* propagated from another block */
1960 int bb_index
; /* the original block. */
1962 static struct spu_bb_info
*spu_bb_info
;
1964 #define STOP_HINT_P(INSN) \
1966 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1967 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1969 /* 1 when RTX is a hinted branch or its target. We keep track of
1970 what has been hinted so the safe-hint code can test it easily. */
1971 #define HINTED_P(RTX) \
1972 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
1974 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
1975 #define SCHED_ON_EVEN_P(RTX) \
1976 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
1978 /* Emit a nop for INSN such that the two will dual issue. This assumes
1979 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1980 We check for TImode to handle a MULTI1 insn which has dual issued its
1981 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
1983 emit_nop_for_insn (rtx insn
)
1988 /* We need to handle JUMP_TABLE_DATA separately. */
1989 if (JUMP_TABLE_DATA_P (insn
))
1991 new_insn
= emit_insn_after (gen_lnop(), insn
);
1992 recog_memoized (new_insn
);
1993 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
1997 p
= get_pipe (insn
);
1998 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
1999 new_insn
= emit_insn_after (gen_lnop (), insn
);
2000 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2002 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2003 PUT_MODE (new_insn
, TImode
);
2004 PUT_MODE (insn
, VOIDmode
);
2007 new_insn
= emit_insn_after (gen_lnop (), insn
);
2008 recog_memoized (new_insn
);
2009 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2012 /* Insert nops in basic blocks to meet dual issue alignment
2013 requirements. Also make sure hbrp and hint instructions are at least
2014 one cycle apart, possibly inserting a nop. */
2018 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2022 /* This sets up INSN_ADDRESSES. */
2023 shorten_branches (get_insns ());
2025 /* Keep track of length added by nops. */
2029 insn
= get_insns ();
2030 if (!active_insn_p (insn
))
2031 insn
= next_active_insn (insn
);
2032 for (; insn
; insn
= next_insn
)
2034 next_insn
= next_active_insn (insn
);
2035 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2036 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2040 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2041 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2042 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2045 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2046 PUT_MODE (prev_insn
, GET_MODE (insn
));
2047 PUT_MODE (insn
, TImode
);
2048 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2054 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2056 if (GET_MODE (insn
) == TImode
)
2057 PUT_MODE (next_insn
, TImode
);
2059 next_insn
= next_active_insn (insn
);
2061 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2062 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2064 if (((addr
+ length
) & 7) != 0)
2066 emit_nop_for_insn (prev_insn
);
2070 else if (GET_MODE (insn
) == TImode
2071 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2072 || get_attr_type (insn
) == TYPE_MULTI0
)
2073 && ((addr
+ length
) & 7) != 0)
2075 /* prev_insn will always be set because the first insn is
2076 always 8-byte aligned. */
2077 emit_nop_for_insn (prev_insn
);
2085 /* Routines for branch hints. */
2088 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2089 int distance
, sbitmap blocks
)
2091 rtx branch_label
= 0;
2096 if (before
== 0 || branch
== 0 || target
== 0)
2099 /* While scheduling we require hints to be no further than 600, so
2100 we need to enforce that here too */
2104 /* If we have a Basic block note, emit it after the basic block note. */
2105 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2106 before
= NEXT_INSN (before
);
2108 branch_label
= gen_label_rtx ();
2109 LABEL_NUSES (branch_label
)++;
2110 LABEL_PRESERVE_P (branch_label
) = 1;
2111 insn
= emit_label_before (branch_label
, branch
);
2112 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2113 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2115 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2116 recog_memoized (hint
);
2117 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2118 HINTED_P (branch
) = 1;
2120 if (GET_CODE (target
) == LABEL_REF
)
2121 HINTED_P (XEXP (target
, 0)) = 1;
2122 else if (tablejump_p (branch
, 0, &table
))
2126 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2127 vec
= XVEC (PATTERN (table
), 0);
2129 vec
= XVEC (PATTERN (table
), 1);
2130 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2131 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2134 if (distance
>= 588)
2136 /* Make sure the hint isn't scheduled any earlier than this point,
2137 which could make it too far for the branch offest to fit */
2138 insn
= emit_insn_before (gen_blockage (), hint
);
2139 recog_memoized (insn
);
2140 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2142 else if (distance
<= 8 * 4)
2144 /* To guarantee at least 8 insns between the hint and branch we
2147 for (d
= distance
; d
< 8 * 4; d
+= 4)
2150 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2151 recog_memoized (insn
);
2152 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2155 /* Make sure any nops inserted aren't scheduled before the hint. */
2156 insn
= emit_insn_after (gen_blockage (), hint
);
2157 recog_memoized (insn
);
2158 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2160 /* Make sure any nops inserted aren't scheduled after the call. */
2161 if (CALL_P (branch
) && distance
< 8 * 4)
2163 insn
= emit_insn_before (gen_blockage (), branch
);
2164 recog_memoized (insn
);
2165 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2170 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2171 the rtx for the branch target. */
2173 get_branch_target (rtx branch
)
2175 if (JUMP_P (branch
))
2179 /* Return statements */
2180 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2181 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2184 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2187 set
= single_set (branch
);
2188 src
= SET_SRC (set
);
2189 if (GET_CODE (SET_DEST (set
)) != PC
)
2192 if (GET_CODE (src
) == IF_THEN_ELSE
)
2195 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2198 /* If the more probable case is not a fall through, then
2199 try a branch hint. */
2200 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2201 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2202 && GET_CODE (XEXP (src
, 1)) != PC
)
2203 lab
= XEXP (src
, 1);
2204 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2205 && GET_CODE (XEXP (src
, 2)) != PC
)
2206 lab
= XEXP (src
, 2);
2210 if (GET_CODE (lab
) == RETURN
)
2211 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2219 else if (CALL_P (branch
))
2222 /* All of our call patterns are in a PARALLEL and the CALL is
2223 the first pattern in the PARALLEL. */
2224 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2226 call
= XVECEXP (PATTERN (branch
), 0, 0);
2227 if (GET_CODE (call
) == SET
)
2228 call
= SET_SRC (call
);
2229 if (GET_CODE (call
) != CALL
)
2231 return XEXP (XEXP (call
, 0), 0);
2236 /* The special $hbr register is used to prevent the insn scheduler from
2237 moving hbr insns across instructions which invalidate them. It
2238 should only be used in a clobber, and this function searches for
2239 insns which clobber it. */
2241 insn_clobbers_hbr (rtx insn
)
2244 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2246 rtx parallel
= PATTERN (insn
);
2249 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2251 clobber
= XVECEXP (parallel
, 0, j
);
2252 if (GET_CODE (clobber
) == CLOBBER
2253 && GET_CODE (XEXP (clobber
, 0)) == REG
2254 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2261 /* Search up to 32 insns starting at FIRST:
2262 - at any kind of hinted branch, just return
2263 - at any unconditional branch in the first 15 insns, just return
2264 - at a call or indirect branch, after the first 15 insns, force it to
2265 an even address and return
2266 - at any unconditional branch, after the first 15 insns, force it to
2268 At then end of the search, insert an hbrp within 4 insns of FIRST,
2269 and an hbrp within 16 instructions of FIRST.
2272 insert_hbrp_for_ilb_runout (rtx first
)
2274 rtx insn
, before_4
= 0, before_16
= 0;
2275 int addr
= 0, length
, first_addr
= -1;
2276 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2277 int insert_lnop_after
= 0;
2278 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2281 if (first_addr
== -1)
2282 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2283 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2284 length
= get_attr_length (insn
);
2286 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2288 /* We test for 14 instructions because the first hbrp will add
2289 up to 2 instructions. */
2290 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2293 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2295 /* Make sure an hbrp is at least 2 cycles away from a hint.
2296 Insert an lnop after the hbrp when necessary. */
2297 if (before_4
== 0 && addr
> 0)
2300 insert_lnop_after
|= 1;
2302 else if (before_4
&& addr
<= 4 * 4)
2303 insert_lnop_after
|= 1;
2304 if (before_16
== 0 && addr
> 10 * 4)
2307 insert_lnop_after
|= 2;
2309 else if (before_16
&& addr
<= 14 * 4)
2310 insert_lnop_after
|= 2;
2313 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2315 if (addr
< hbrp_addr0
)
2317 else if (addr
< hbrp_addr1
)
2321 if (CALL_P (insn
) || JUMP_P (insn
))
2323 if (HINTED_P (insn
))
2326 /* Any branch after the first 15 insns should be on an even
2327 address to avoid a special case branch. There might be
2328 some nops and/or hbrps inserted, so we test after 10
2331 SCHED_ON_EVEN_P (insn
) = 1;
2334 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2338 if (addr
+ length
>= 32 * 4)
2340 gcc_assert (before_4
&& before_16
);
2341 if (hbrp_addr0
> 4 * 4)
2344 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2345 recog_memoized (insn
);
2346 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2347 INSN_ADDRESSES_NEW (insn
,
2348 INSN_ADDRESSES (INSN_UID (before_4
)));
2349 PUT_MODE (insn
, GET_MODE (before_4
));
2350 PUT_MODE (before_4
, TImode
);
2351 if (insert_lnop_after
& 1)
2353 insn
= emit_insn_before (gen_lnop (), before_4
);
2354 recog_memoized (insn
);
2355 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2356 INSN_ADDRESSES_NEW (insn
,
2357 INSN_ADDRESSES (INSN_UID (before_4
)));
2358 PUT_MODE (insn
, TImode
);
2361 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2362 && hbrp_addr1
> 16 * 4)
2365 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2366 recog_memoized (insn
);
2367 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2368 INSN_ADDRESSES_NEW (insn
,
2369 INSN_ADDRESSES (INSN_UID (before_16
)));
2370 PUT_MODE (insn
, GET_MODE (before_16
));
2371 PUT_MODE (before_16
, TImode
);
2372 if (insert_lnop_after
& 2)
2374 insn
= emit_insn_before (gen_lnop (), before_16
);
2375 recog_memoized (insn
);
2376 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2377 INSN_ADDRESSES_NEW (insn
,
2378 INSN_ADDRESSES (INSN_UID
2380 PUT_MODE (insn
, TImode
);
2386 else if (BARRIER_P (insn
))
2391 /* The SPU might hang when it executes 48 inline instructions after a
2392 hinted branch jumps to its hinted target. The beginning of a
2393 function and the return from a call might have been hinted, and
2394 must be handled as well. To prevent a hang we insert 2 hbrps. The
2395 first should be within 6 insns of the branch target. The second
2396 should be within 22 insns of the branch target. When determining
2397 if hbrps are necessary, we look for only 32 inline instructions,
2398 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2399 when inserting new hbrps, we insert them within 4 and 16 insns of
2405 if (TARGET_SAFE_HINTS
)
2407 shorten_branches (get_insns ());
2408 /* Insert hbrp at beginning of function */
2409 insn
= next_active_insn (get_insns ());
2411 insert_hbrp_for_ilb_runout (insn
);
2412 /* Insert hbrp after hinted targets. */
2413 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2414 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2415 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2419 static int in_spu_reorg
;
2422 spu_var_tracking (void)
2424 if (flag_var_tracking
)
2427 timevar_push (TV_VAR_TRACKING
);
2428 variable_tracking_main ();
2429 timevar_pop (TV_VAR_TRACKING
);
2430 df_finish_pass (false);
2434 /* Insert branch hints. There are no branch optimizations after this
2435 pass, so it's safe to set our branch hints now. */
2437 spu_machine_dependent_reorg (void)
2442 rtx branch_target
= 0;
2443 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2447 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2449 /* We still do it for unoptimized code because an external
2450 function might have hinted a call or return. */
2451 compute_bb_for_insn ();
2454 spu_var_tracking ();
2455 free_bb_for_insn ();
2459 blocks
= sbitmap_alloc (last_basic_block
);
2460 bitmap_clear (blocks
);
2463 compute_bb_for_insn ();
2465 /* (Re-)discover loops so that bb->loop_father can be used
2466 in the analysis below. */
2467 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2472 (struct spu_bb_info
*) xcalloc (n_basic_blocks
,
2473 sizeof (struct spu_bb_info
));
2475 /* We need exact insn addresses and lengths. */
2476 shorten_branches (get_insns ());
2478 for (i
= n_basic_blocks
- 1; i
>= 0; i
--)
2480 bb
= BASIC_BLOCK (i
);
2482 if (spu_bb_info
[i
].prop_jump
)
2484 branch
= spu_bb_info
[i
].prop_jump
;
2485 branch_target
= get_branch_target (branch
);
2486 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2487 required_dist
= spu_hint_dist
;
2489 /* Search from end of a block to beginning. In this loop, find
2490 jumps which need a branch and emit them only when:
2491 - it's an indirect branch and we're at the insn which sets
2493 - we're at an insn that will invalidate the hint. e.g., a
2494 call, another hint insn, inline asm that clobbers $hbr, and
2495 some inlined operations (divmodsi4). Don't consider jumps
2496 because they are only at the end of a block and are
2497 considered when we are deciding whether to propagate
2498 - we're getting too far away from the branch. The hbr insns
2499 only have a signed 10 bit offset
2500 We go back as far as possible so the branch will be considered
2501 for propagation when we get to the beginning of the block. */
2502 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2506 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2508 && ((GET_CODE (branch_target
) == REG
2509 && set_of (branch_target
, insn
) != NULL_RTX
)
2510 || insn_clobbers_hbr (insn
)
2511 || branch_addr
- insn_addr
> 600))
2513 rtx next
= NEXT_INSN (insn
);
2514 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2515 if (insn
!= BB_END (bb
)
2516 && branch_addr
- next_addr
>= required_dist
)
2520 "hint for %i in block %i before %i\n",
2521 INSN_UID (branch
), bb
->index
,
2523 spu_emit_branch_hint (next
, branch
, branch_target
,
2524 branch_addr
- next_addr
, blocks
);
2529 /* JUMP_P will only be true at the end of a block. When
2530 branch is already set it means we've previously decided
2531 to propagate a hint for that branch into this block. */
2532 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2535 if ((branch_target
= get_branch_target (insn
)))
2538 branch_addr
= insn_addr
;
2539 required_dist
= spu_hint_dist
;
2543 if (insn
== BB_HEAD (bb
))
2549 /* If we haven't emitted a hint for this branch yet, it might
2550 be profitable to emit it in one of the predecessor blocks,
2551 especially for loops. */
2553 basic_block prev
= 0, prop
= 0, prev2
= 0;
2554 int loop_exit
= 0, simple_loop
= 0;
2555 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2557 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2558 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2559 prev
= EDGE_PRED (bb
, j
)->src
;
2561 prev2
= EDGE_PRED (bb
, j
)->src
;
2563 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2564 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2566 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2569 /* If this branch is a loop exit then propagate to previous
2570 fallthru block. This catches the cases when it is a simple
2571 loop or when there is an initial branch into the loop. */
2572 if (prev
&& (loop_exit
|| simple_loop
)
2573 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2576 /* If there is only one adjacent predecessor. Don't propagate
2577 outside this loop. */
2578 else if (prev
&& single_pred_p (bb
)
2579 && prev
->loop_father
== bb
->loop_father
)
2582 /* If this is the JOIN block of a simple IF-THEN then
2583 propagate the hint to the HEADER block. */
2584 else if (prev
&& prev2
2585 && EDGE_COUNT (bb
->preds
) == 2
2586 && EDGE_COUNT (prev
->preds
) == 1
2587 && EDGE_PRED (prev
, 0)->src
== prev2
2588 && prev2
->loop_father
== bb
->loop_father
2589 && GET_CODE (branch_target
) != REG
)
2592 /* Don't propagate when:
2593 - this is a simple loop and the hint would be too far
2594 - this is not a simple loop and there are 16 insns in
2596 - the predecessor block ends in a branch that will be
2598 - the predecessor block ends in an insn that invalidates
2602 && (bbend
= BB_END (prop
))
2603 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2604 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2605 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2608 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2609 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2610 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2611 INSN_UID (branch
), loop_exit
, simple_loop
,
2612 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2614 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2615 spu_bb_info
[prop
->index
].bb_index
= i
;
2617 else if (branch_addr
- next_addr
>= required_dist
)
2620 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2621 INSN_UID (branch
), bb
->index
,
2622 INSN_UID (NEXT_INSN (insn
)));
2623 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2624 branch_addr
- next_addr
, blocks
);
2631 if (!bitmap_empty_p (blocks
))
2632 find_many_sub_basic_blocks (blocks
);
2634 /* We have to schedule to make sure alignment is ok. */
2635 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2637 /* The hints need to be scheduled, so call it again. */
2639 df_finish_pass (true);
2645 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2646 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2648 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2649 between its branch label and the branch . We don't move the
2650 label because GCC expects it at the beginning of the block. */
2651 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2652 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2653 rtx label
= XEXP (label_ref
, 0);
2656 for (branch
= NEXT_INSN (label
);
2657 !JUMP_P (branch
) && !CALL_P (branch
);
2658 branch
= NEXT_INSN (branch
))
2659 if (NONJUMP_INSN_P (branch
))
2660 offset
+= get_attr_length (branch
);
2662 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2665 spu_var_tracking ();
2667 loop_optimizer_finalize ();
2669 free_bb_for_insn ();
2675 /* Insn scheduling routines, primarily for dual issue. */
2677 spu_sched_issue_rate (void)
2683 uses_ls_unit(rtx insn
)
2685 rtx set
= single_set (insn
);
2687 && (GET_CODE (SET_DEST (set
)) == MEM
2688 || GET_CODE (SET_SRC (set
)) == MEM
))
2697 /* Handle inline asm */
2698 if (INSN_CODE (insn
) == -1)
2700 t
= get_attr_type (insn
);
2725 case TYPE_IPREFETCH
:
2733 /* haifa-sched.c has a static variable that keeps track of the current
2734 cycle. It is passed to spu_sched_reorder, and we record it here for
2735 use by spu_sched_variable_issue. It won't be accurate if the
2736 scheduler updates it's clock_var between the two calls. */
2737 static int clock_var
;
2739 /* This is used to keep track of insn alignment. Set to 0 at the
2740 beginning of each block and increased by the "length" attr of each
2742 static int spu_sched_length
;
2744 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2745 ready list appropriately in spu_sched_reorder(). */
2746 static int pipe0_clock
;
2747 static int pipe1_clock
;
2749 static int prev_clock_var
;
2751 static int prev_priority
;
2753 /* The SPU needs to load the next ilb sometime during the execution of
2754 the previous ilb. There is a potential conflict if every cycle has a
2755 load or store. To avoid the conflict we make sure the load/store
2756 unit is free for at least one cycle during the execution of insns in
2757 the previous ilb. */
2758 static int spu_ls_first
;
2759 static int prev_ls_clock
;
2762 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2763 int max_ready ATTRIBUTE_UNUSED
)
2765 spu_sched_length
= 0;
2769 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2770 int max_ready ATTRIBUTE_UNUSED
)
2772 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2774 /* When any block might be at least 8-byte aligned, assume they
2775 will all be at least 8-byte aligned to make sure dual issue
2776 works out correctly. */
2777 spu_sched_length
= 0;
2779 spu_ls_first
= INT_MAX
;
2784 prev_clock_var
= -1;
2789 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2790 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
2794 if (GET_CODE (PATTERN (insn
)) == USE
2795 || GET_CODE (PATTERN (insn
)) == CLOBBER
2796 || (len
= get_attr_length (insn
)) == 0)
2799 spu_sched_length
+= len
;
2801 /* Reset on inline asm */
2802 if (INSN_CODE (insn
) == -1)
2804 spu_ls_first
= INT_MAX
;
2809 p
= get_pipe (insn
);
2811 pipe0_clock
= clock_var
;
2813 pipe1_clock
= clock_var
;
2817 if (clock_var
- prev_ls_clock
> 1
2818 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2819 spu_ls_first
= INT_MAX
;
2820 if (uses_ls_unit (insn
))
2822 if (spu_ls_first
== INT_MAX
)
2823 spu_ls_first
= spu_sched_length
;
2824 prev_ls_clock
= clock_var
;
2827 /* The scheduler hasn't inserted the nop, but we will later on.
2828 Include those nops in spu_sched_length. */
2829 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2830 spu_sched_length
+= 4;
2831 prev_clock_var
= clock_var
;
2833 /* more is -1 when called from spu_sched_reorder for new insns
2834 that don't have INSN_PRIORITY */
2836 prev_priority
= INSN_PRIORITY (insn
);
2839 /* Always try issuing more insns. spu_sched_reorder will decide
2840 when the cycle should be advanced. */
2844 /* This function is called for both TARGET_SCHED_REORDER and
2845 TARGET_SCHED_REORDER2. */
2847 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2848 rtx
*ready
, int *nreadyp
, int clock
)
2850 int i
, nready
= *nreadyp
;
2851 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2856 if (nready
<= 0 || pipe1_clock
>= clock
)
2859 /* Find any rtl insns that don't generate assembly insns and schedule
2861 for (i
= nready
- 1; i
>= 0; i
--)
2864 if (INSN_CODE (insn
) == -1
2865 || INSN_CODE (insn
) == CODE_FOR_blockage
2866 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2868 ready
[i
] = ready
[nready
- 1];
2869 ready
[nready
- 1] = insn
;
2874 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2875 for (i
= 0; i
< nready
; i
++)
2876 if (INSN_CODE (ready
[i
]) != -1)
2879 switch (get_attr_type (insn
))
2904 case TYPE_IPREFETCH
:
2910 /* In the first scheduling phase, schedule loads and stores together
2911 to increase the chance they will get merged during postreload CSE. */
2912 if (!reload_completed
&& pipe_ls
>= 0)
2914 insn
= ready
[pipe_ls
];
2915 ready
[pipe_ls
] = ready
[nready
- 1];
2916 ready
[nready
- 1] = insn
;
2920 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2924 /* When we have loads/stores in every cycle of the last 15 insns and
2925 we are about to schedule another load/store, emit an hbrp insn
2928 && spu_sched_length
- spu_ls_first
>= 4 * 15
2929 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2931 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2932 recog_memoized (insn
);
2933 if (pipe0_clock
< clock
)
2934 PUT_MODE (insn
, TImode
);
2935 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2939 /* In general, we want to emit nops to increase dual issue, but dual
2940 issue isn't faster when one of the insns could be scheduled later
2941 without effecting the critical path. We look at INSN_PRIORITY to
2942 make a good guess, but it isn't perfect so -mdual-nops=n can be
2943 used to effect it. */
2944 if (in_spu_reorg
&& spu_dual_nops
< 10)
2946 /* When we are at an even address and we are not issuing nops to
2947 improve scheduling then we need to advance the cycle. */
2948 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2949 && (spu_dual_nops
== 0
2952 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2955 /* When at an odd address, schedule the highest priority insn
2956 without considering pipeline. */
2957 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2958 && (spu_dual_nops
== 0
2960 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2965 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2966 pipe0 insn in the ready list, schedule it. */
2967 if (pipe0_clock
< clock
&& pipe_0
>= 0)
2968 schedule_i
= pipe_0
;
2970 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2971 insn to schedule. Put a pipe1 insn at the front of the ready list. */
2973 schedule_i
= pipe_1
;
2975 if (schedule_i
> -1)
2977 insn
= ready
[schedule_i
];
2978 ready
[schedule_i
] = ready
[nready
- 1];
2979 ready
[nready
- 1] = insn
;
2985 /* INSN is dependent on DEP_INSN. */
2987 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
2991 /* The blockage pattern is used to prevent instructions from being
2992 moved across it and has no cost. */
2993 if (INSN_CODE (insn
) == CODE_FOR_blockage
2994 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
2997 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
2998 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3001 /* Make sure hbrps are spread out. */
3002 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3003 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3006 /* Make sure hints and hbrps are 2 cycles apart. */
3007 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3008 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3009 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3010 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3013 /* An hbrp has no real dependency on other insns. */
3014 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3015 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3018 /* Assuming that it is unlikely an argument register will be used in
3019 the first cycle of the called function, we reduce the cost for
3020 slightly better scheduling of dep_insn. When not hinted, the
3021 mispredicted branch would hide the cost as well. */
3024 rtx target
= get_branch_target (insn
);
3025 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3030 /* And when returning from a function, let's assume the return values
3031 are completed sooner too. */
3032 if (CALL_P (dep_insn
))
3035 /* Make sure an instruction that loads from the back chain is schedule
3036 away from the return instruction so a hint is more likely to get
3038 if (INSN_CODE (insn
) == CODE_FOR__return
3039 && (set
= single_set (dep_insn
))
3040 && GET_CODE (SET_DEST (set
)) == REG
3041 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3044 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3045 scheduler makes every insn in a block anti-dependent on the final
3046 jump_insn. We adjust here so higher cost insns will get scheduled
3048 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3049 return insn_cost (dep_insn
) - 3;
3054 /* Create a CONST_DOUBLE from a string. */
3056 spu_float_const (const char *string
, enum machine_mode mode
)
3058 REAL_VALUE_TYPE value
;
3059 value
= REAL_VALUE_ATOF (string
, mode
);
3060 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3064 spu_constant_address_p (rtx x
)
3066 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3067 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3068 || GET_CODE (x
) == HIGH
);
3071 static enum spu_immediate
3072 which_immediate_load (HOST_WIDE_INT val
)
3074 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3076 if (val
>= -0x8000 && val
<= 0x7fff)
3078 if (val
>= 0 && val
<= 0x3ffff)
3080 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3082 if ((val
& 0xffff) == 0)
3088 /* Return true when OP can be loaded by one of the il instructions, or
3089 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3091 immediate_load_p (rtx op
, enum machine_mode mode
)
3093 if (CONSTANT_P (op
))
3095 enum immediate_class c
= classify_immediate (op
, mode
);
3096 return c
== IC_IL1
|| c
== IC_IL1s
3097 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3102 /* Return true if the first SIZE bytes of arr is a constant that can be
3103 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3104 represent the size and offset of the instruction to use. */
3106 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3108 int cpat
, run
, i
, start
;
3112 for (i
= 0; i
< size
&& cpat
; i
++)
3120 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3122 else if (arr
[i
] == 0)
3124 while (arr
[i
+run
] == run
&& i
+run
< 16)
3126 if (run
!= 4 && run
!= 8)
3131 if ((i
& (run
-1)) != 0)
3138 if (cpat
&& (run
|| size
< 16))
3145 *pstart
= start
== -1 ? 16-run
: start
;
3151 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3152 it into a register. MODE is only valid when OP is a CONST_INT. */
3153 static enum immediate_class
3154 classify_immediate (rtx op
, enum machine_mode mode
)
3157 unsigned char arr
[16];
3158 int i
, j
, repeated
, fsmbi
, repeat
;
3160 gcc_assert (CONSTANT_P (op
));
3162 if (GET_MODE (op
) != VOIDmode
)
3163 mode
= GET_MODE (op
);
3165 /* A V4SI const_vector with all identical symbols is ok. */
3168 && GET_CODE (op
) == CONST_VECTOR
3169 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3170 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3171 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3172 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3173 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3174 op
= CONST_VECTOR_ELT (op
, 0);
3176 switch (GET_CODE (op
))
3180 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3183 /* We can never know if the resulting address fits in 18 bits and can be
3184 loaded with ila. For now, assume the address will not overflow if
3185 the displacement is "small" (fits 'K' constraint). */
3186 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3188 rtx sym
= XEXP (XEXP (op
, 0), 0);
3189 rtx cst
= XEXP (XEXP (op
, 0), 1);
3191 if (GET_CODE (sym
) == SYMBOL_REF
3192 && GET_CODE (cst
) == CONST_INT
3193 && satisfies_constraint_K (cst
))
3202 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3203 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3204 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3210 constant_to_array (mode
, op
, arr
);
3212 /* Check that each 4-byte slot is identical. */
3214 for (i
= 4; i
< 16; i
+= 4)
3215 for (j
= 0; j
< 4; j
++)
3216 if (arr
[j
] != arr
[i
+ j
])
3221 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3222 val
= trunc_int_for_mode (val
, SImode
);
3224 if (which_immediate_load (val
) != SPU_NONE
)
3228 /* Any mode of 2 bytes or smaller can be loaded with an il
3230 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3234 for (i
= 0; i
< 16 && fsmbi
; i
++)
3235 if (arr
[i
] != 0 && repeat
== 0)
3237 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3240 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3242 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3255 static enum spu_immediate
3256 which_logical_immediate (HOST_WIDE_INT val
)
3258 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3260 if (val
>= -0x200 && val
<= 0x1ff)
3262 if (val
>= 0 && val
<= 0xffff)
3264 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3266 val
= trunc_int_for_mode (val
, HImode
);
3267 if (val
>= -0x200 && val
<= 0x1ff)
3269 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3271 val
= trunc_int_for_mode (val
, QImode
);
3272 if (val
>= -0x200 && val
<= 0x1ff)
3279 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3282 const_vector_immediate_p (rtx x
)
3285 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3286 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3287 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3288 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3294 logical_immediate_p (rtx op
, enum machine_mode mode
)
3297 unsigned char arr
[16];
3300 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3301 || GET_CODE (op
) == CONST_VECTOR
);
3303 if (GET_CODE (op
) == CONST_VECTOR
3304 && !const_vector_immediate_p (op
))
3307 if (GET_MODE (op
) != VOIDmode
)
3308 mode
= GET_MODE (op
);
3310 constant_to_array (mode
, op
, arr
);
3312 /* Check that bytes are repeated. */
3313 for (i
= 4; i
< 16; i
+= 4)
3314 for (j
= 0; j
< 4; j
++)
3315 if (arr
[j
] != arr
[i
+ j
])
3318 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3319 val
= trunc_int_for_mode (val
, SImode
);
3321 i
= which_logical_immediate (val
);
3322 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3326 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3329 unsigned char arr
[16];
3332 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3333 || GET_CODE (op
) == CONST_VECTOR
);
3335 if (GET_CODE (op
) == CONST_VECTOR
3336 && !const_vector_immediate_p (op
))
3339 if (GET_MODE (op
) != VOIDmode
)
3340 mode
= GET_MODE (op
);
3342 constant_to_array (mode
, op
, arr
);
3344 /* Check that bytes are repeated. */
3345 for (i
= 4; i
< 16; i
+= 4)
3346 for (j
= 0; j
< 4; j
++)
3347 if (arr
[j
] != arr
[i
+ j
])
3350 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3351 val
= trunc_int_for_mode (val
, SImode
);
3353 return val
>= 0 && val
<= 0xffff;
3357 arith_immediate_p (rtx op
, enum machine_mode mode
,
3358 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3361 unsigned char arr
[16];
3364 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3365 || GET_CODE (op
) == CONST_VECTOR
);
3367 if (GET_CODE (op
) == CONST_VECTOR
3368 && !const_vector_immediate_p (op
))
3371 if (GET_MODE (op
) != VOIDmode
)
3372 mode
= GET_MODE (op
);
3374 constant_to_array (mode
, op
, arr
);
3376 if (VECTOR_MODE_P (mode
))
3377 mode
= GET_MODE_INNER (mode
);
3379 bytes
= GET_MODE_SIZE (mode
);
3380 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3382 /* Check that bytes are repeated. */
3383 for (i
= bytes
; i
< 16; i
+= bytes
)
3384 for (j
= 0; j
< bytes
; j
++)
3385 if (arr
[j
] != arr
[i
+ j
])
3389 for (j
= 1; j
< bytes
; j
++)
3390 val
= (val
<< 8) | arr
[j
];
3392 val
= trunc_int_for_mode (val
, mode
);
3394 return val
>= low
&& val
<= high
;
3397 /* TRUE when op is an immediate and an exact power of 2, and given that
3398 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3399 all entries must be the same. */
3401 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3403 enum machine_mode int_mode
;
3405 unsigned char arr
[16];
3408 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3409 || GET_CODE (op
) == CONST_VECTOR
);
3411 if (GET_CODE (op
) == CONST_VECTOR
3412 && !const_vector_immediate_p (op
))
3415 if (GET_MODE (op
) != VOIDmode
)
3416 mode
= GET_MODE (op
);
3418 constant_to_array (mode
, op
, arr
);
3420 if (VECTOR_MODE_P (mode
))
3421 mode
= GET_MODE_INNER (mode
);
3423 bytes
= GET_MODE_SIZE (mode
);
3424 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3426 /* Check that bytes are repeated. */
3427 for (i
= bytes
; i
< 16; i
+= bytes
)
3428 for (j
= 0; j
< bytes
; j
++)
3429 if (arr
[j
] != arr
[i
+ j
])
3433 for (j
= 1; j
< bytes
; j
++)
3434 val
= (val
<< 8) | arr
[j
];
3436 val
= trunc_int_for_mode (val
, int_mode
);
3438 /* Currently, we only handle SFmode */
3439 gcc_assert (mode
== SFmode
);
3442 int exp
= (val
>> 23) - 127;
3443 return val
> 0 && (val
& 0x007fffff) == 0
3444 && exp
>= low
&& exp
<= high
;
3449 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3452 ea_symbol_ref (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
3457 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3459 rtx plus
= XEXP (x
, 0);
3460 rtx op0
= XEXP (plus
, 0);
3461 rtx op1
= XEXP (plus
, 1);
3462 if (GET_CODE (op1
) == CONST_INT
)
3466 return (GET_CODE (x
) == SYMBOL_REF
3467 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3468 && TREE_CODE (decl
) == VAR_DECL
3469 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3473 - any 32-bit constant (SImode, SFmode)
3474 - any constant that can be generated with fsmbi (any mode)
3475 - a 64-bit constant where the high and low bits are identical
3477 - a 128-bit constant where the four 32-bit words match. */
3479 spu_legitimate_constant_p (enum machine_mode mode
, rtx x
)
3481 if (GET_CODE (x
) == HIGH
)
3484 /* Reject any __ea qualified reference. These can't appear in
3485 instructions but must be forced to the constant pool. */
3486 if (for_each_rtx (&x
, ea_symbol_ref
, 0))
3489 /* V4SI with all identical symbols is valid. */
3492 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3493 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3494 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3495 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3496 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3497 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3499 if (GET_CODE (x
) == CONST_VECTOR
3500 && !const_vector_immediate_p (x
))
3505 /* Valid address are:
3506 - symbol_ref, label_ref, const
3508 - reg + const_int, where const_int is 16 byte aligned
3509 - reg + reg, alignment doesn't matter
3510 The alignment matters in the reg+const case because lqd and stqd
3511 ignore the 4 least significant bits of the const. We only care about
3512 16 byte modes because the expand phase will change all smaller MEM
3513 references to TImode. */
3515 spu_legitimate_address_p (enum machine_mode mode
,
3516 rtx x
, bool reg_ok_strict
)
3518 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3520 && GET_CODE (x
) == AND
3521 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3522 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3524 switch (GET_CODE (x
))
3527 return !TARGET_LARGE_MEM
;
3531 /* Keep __ea references until reload so that spu_expand_mov can see them
3533 if (ea_symbol_ref (&x
, 0))
3534 return !reload_in_progress
&& !reload_completed
;
3535 return !TARGET_LARGE_MEM
;
3538 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3546 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3551 rtx op0
= XEXP (x
, 0);
3552 rtx op1
= XEXP (x
, 1);
3553 if (GET_CODE (op0
) == SUBREG
)
3554 op0
= XEXP (op0
, 0);
3555 if (GET_CODE (op1
) == SUBREG
)
3556 op1
= XEXP (op1
, 0);
3557 if (GET_CODE (op0
) == REG
3558 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3559 && GET_CODE (op1
) == CONST_INT
3560 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3561 /* If virtual registers are involved, the displacement will
3562 change later on anyway, so checking would be premature.
3563 Reload will make sure the final displacement after
3564 register elimination is OK. */
3565 || op0
== arg_pointer_rtx
3566 || op0
== frame_pointer_rtx
3567 || op0
== virtual_stack_vars_rtx
)
3568 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3570 if (GET_CODE (op0
) == REG
3571 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3572 && GET_CODE (op1
) == REG
3573 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3584 /* Like spu_legitimate_address_p, except with named addresses. */
3586 spu_addr_space_legitimate_address_p (enum machine_mode mode
, rtx x
,
3587 bool reg_ok_strict
, addr_space_t as
)
3589 if (as
== ADDR_SPACE_EA
)
3590 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3592 else if (as
!= ADDR_SPACE_GENERIC
)
3595 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3598 /* When the address is reg + const_int, force the const_int into a
3601 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3602 enum machine_mode mode ATTRIBUTE_UNUSED
)
3605 /* Make sure both operands are registers. */
3606 if (GET_CODE (x
) == PLUS
)
3610 if (ALIGNED_SYMBOL_REF_P (op0
))
3612 op0
= force_reg (Pmode
, op0
);
3613 mark_reg_pointer (op0
, 128);
3615 else if (GET_CODE (op0
) != REG
)
3616 op0
= force_reg (Pmode
, op0
);
3617 if (ALIGNED_SYMBOL_REF_P (op1
))
3619 op1
= force_reg (Pmode
, op1
);
3620 mark_reg_pointer (op1
, 128);
3622 else if (GET_CODE (op1
) != REG
)
3623 op1
= force_reg (Pmode
, op1
);
3624 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3629 /* Like spu_legitimate_address, except with named address support. */
3631 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, enum machine_mode mode
,
3634 if (as
!= ADDR_SPACE_GENERIC
)
3637 return spu_legitimize_address (x
, oldx
, mode
);
3640 /* Reload reg + const_int for out-of-range displacements. */
3642 spu_legitimize_reload_address (rtx ad
, enum machine_mode mode ATTRIBUTE_UNUSED
,
3643 int opnum
, int type
)
3645 bool removed_and
= false;
3647 if (GET_CODE (ad
) == AND
3648 && CONST_INT_P (XEXP (ad
, 1))
3649 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3655 if (GET_CODE (ad
) == PLUS
3656 && REG_P (XEXP (ad
, 0))
3657 && CONST_INT_P (XEXP (ad
, 1))
3658 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3659 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3661 /* Unshare the sum. */
3664 /* Reload the displacement. */
3665 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3666 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3667 opnum
, (enum reload_type
) type
);
3669 /* Add back AND for alignment if we stripped it. */
3671 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3679 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3680 struct attribute_spec.handler. */
3682 spu_handle_fndecl_attribute (tree
* node
,
3684 tree args ATTRIBUTE_UNUSED
,
3685 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3687 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3689 warning (0, "%qE attribute only applies to functions",
3691 *no_add_attrs
= true;
3697 /* Handle the "vector" attribute. */
3699 spu_handle_vector_attribute (tree
* node
, tree name
,
3700 tree args ATTRIBUTE_UNUSED
,
3701 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3703 tree type
= *node
, result
= NULL_TREE
;
3704 enum machine_mode mode
;
3707 while (POINTER_TYPE_P (type
)
3708 || TREE_CODE (type
) == FUNCTION_TYPE
3709 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3710 type
= TREE_TYPE (type
);
3712 mode
= TYPE_MODE (type
);
3714 unsigned_p
= TYPE_UNSIGNED (type
);
3718 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3721 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3724 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3727 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3730 result
= V4SF_type_node
;
3733 result
= V2DF_type_node
;
3739 /* Propagate qualifiers attached to the element type
3740 onto the vector type. */
3741 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3742 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3744 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3747 warning (0, "%qE attribute ignored", name
);
3749 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3754 /* Return nonzero if FUNC is a naked function. */
3756 spu_naked_function_p (tree func
)
3760 if (TREE_CODE (func
) != FUNCTION_DECL
)
3763 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3764 return a
!= NULL_TREE
;
3768 spu_initial_elimination_offset (int from
, int to
)
3770 int saved_regs_size
= spu_saved_regs_size ();
3772 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3773 || get_frame_size () || saved_regs_size
)
3774 sp_offset
= STACK_POINTER_OFFSET
;
3775 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3776 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3777 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3778 return get_frame_size ();
3779 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3780 return sp_offset
+ crtl
->outgoing_args_size
3781 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3782 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3783 return get_frame_size () + saved_regs_size
+ sp_offset
;
3789 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3791 enum machine_mode mode
= TYPE_MODE (type
);
3792 int byte_size
= ((mode
== BLKmode
)
3793 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3795 /* Make sure small structs are left justified in a register. */
3796 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3797 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3799 enum machine_mode smode
;
3802 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3803 int n
= byte_size
/ UNITS_PER_WORD
;
3804 v
= rtvec_alloc (nregs
);
3805 for (i
= 0; i
< n
; i
++)
3807 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3808 gen_rtx_REG (TImode
,
3811 GEN_INT (UNITS_PER_WORD
* i
));
3812 byte_size
-= UNITS_PER_WORD
;
3820 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3822 gen_rtx_EXPR_LIST (VOIDmode
,
3823 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3824 GEN_INT (UNITS_PER_WORD
* n
));
3826 return gen_rtx_PARALLEL (mode
, v
);
3828 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3832 spu_function_arg (cumulative_args_t cum_v
,
3833 enum machine_mode mode
,
3834 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3836 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3839 if (*cum
>= MAX_REGISTER_ARGS
)
3842 byte_size
= ((mode
== BLKmode
)
3843 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3845 /* The ABI does not allow parameters to be passed partially in
3846 reg and partially in stack. */
3847 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3850 /* Make sure small structs are left justified in a register. */
3851 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3852 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3854 enum machine_mode smode
;
3858 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3859 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3860 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3862 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3865 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3869 spu_function_arg_advance (cumulative_args_t cum_v
, enum machine_mode mode
,
3870 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3872 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3874 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3877 ? ((int_size_in_bytes (type
) + 15) / 16)
3880 : HARD_REGNO_NREGS (cum
, mode
));
3883 /* Variable sized types are passed by reference. */
3885 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3886 enum machine_mode mode ATTRIBUTE_UNUSED
,
3887 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3889 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3895 /* Create and return the va_list datatype.
3897 On SPU, va_list is an array type equivalent to
3899 typedef struct __va_list_tag
3901 void *__args __attribute__((__aligned(16)));
3902 void *__skip __attribute__((__aligned(16)));
3906 where __args points to the arg that will be returned by the next
3907 va_arg(), and __skip points to the previous stack frame such that
3908 when __args == __skip we should advance __args by 32 bytes. */
3910 spu_build_builtin_va_list (void)
3912 tree f_args
, f_skip
, record
, type_decl
;
3915 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3918 build_decl (BUILTINS_LOCATION
,
3919 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3921 f_args
= build_decl (BUILTINS_LOCATION
,
3922 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3923 f_skip
= build_decl (BUILTINS_LOCATION
,
3924 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3926 DECL_FIELD_CONTEXT (f_args
) = record
;
3927 DECL_ALIGN (f_args
) = 128;
3928 DECL_USER_ALIGN (f_args
) = 1;
3930 DECL_FIELD_CONTEXT (f_skip
) = record
;
3931 DECL_ALIGN (f_skip
) = 128;
3932 DECL_USER_ALIGN (f_skip
) = 1;
3934 TYPE_STUB_DECL (record
) = type_decl
;
3935 TYPE_NAME (record
) = type_decl
;
3936 TYPE_FIELDS (record
) = f_args
;
3937 DECL_CHAIN (f_args
) = f_skip
;
3939 /* We know this is being padded and we want it too. It is an internal
3940 type so hide the warnings from the user. */
3942 warn_padded
= false;
3944 layout_type (record
);
3948 /* The correct type is an array type of one element. */
3949 return build_array_type (record
, build_index_type (size_zero_node
));
3952 /* Implement va_start by filling the va_list structure VALIST.
3953 NEXTARG points to the first anonymous stack argument.
3955 The following global variables are used to initialize
3956 the va_list structure:
3959 the CUMULATIVE_ARGS for this function
3961 crtl->args.arg_offset_rtx:
3962 holds the offset of the first anonymous stack argument
3963 (relative to the virtual arg pointer). */
3966 spu_va_start (tree valist
, rtx nextarg
)
3968 tree f_args
, f_skip
;
3971 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3972 f_skip
= DECL_CHAIN (f_args
);
3974 valist
= build_simple_mem_ref (valist
);
3976 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3978 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3980 /* Find the __args area. */
3981 t
= make_tree (TREE_TYPE (args
), nextarg
);
3982 if (crtl
->args
.pretend_args_size
> 0)
3983 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
3984 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
3985 TREE_SIDE_EFFECTS (t
) = 1;
3986 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3988 /* Find the __skip area. */
3989 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
3990 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
3991 - STACK_POINTER_OFFSET
));
3992 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
3993 TREE_SIDE_EFFECTS (t
) = 1;
3994 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3997 /* Gimplify va_arg by updating the va_list structure
3998 VALIST as required to retrieve an argument of type
3999 TYPE, and returning that argument.
4001 ret = va_arg(VALIST, TYPE);
4003 generates code equivalent to:
4005 paddedsize = (sizeof(TYPE) + 15) & -16;
4006 if (VALIST.__args + paddedsize > VALIST.__skip
4007 && VALIST.__args <= VALIST.__skip)
4008 addr = VALIST.__skip + 32;
4010 addr = VALIST.__args;
4011 VALIST.__args = addr + paddedsize;
4012 ret = *(TYPE *)addr;
4015 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4016 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4018 tree f_args
, f_skip
;
4020 HOST_WIDE_INT size
, rsize
;
4022 bool pass_by_reference_p
;
4024 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4025 f_skip
= DECL_CHAIN (f_args
);
4027 valist
= build_simple_mem_ref (valist
);
4029 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4031 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4033 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4035 /* if an object is dynamically sized, a pointer to it is passed
4036 instead of the object itself. */
4037 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4039 if (pass_by_reference_p
)
4040 type
= build_pointer_type (type
);
4041 size
= int_size_in_bytes (type
);
4042 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4044 /* build conditional expression to calculate addr. The expression
4045 will be gimplified later. */
4046 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4047 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4048 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4049 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4050 unshare_expr (skip
)));
4052 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4053 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4054 unshare_expr (args
));
4056 gimplify_assign (addr
, tmp
, pre_p
);
4058 /* update VALIST.__args */
4059 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4060 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4062 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4065 if (pass_by_reference_p
)
4066 addr
= build_va_arg_indirect_ref (addr
);
4068 return build_va_arg_indirect_ref (addr
);
4071 /* Save parameter registers starting with the register that corresponds
4072 to the first unnamed parameters. If the first unnamed parameter is
4073 in the stack then save no registers. Set pretend_args_size to the
4074 amount of space needed to save the registers. */
4076 spu_setup_incoming_varargs (cumulative_args_t cum
, enum machine_mode mode
,
4077 tree type
, int *pretend_size
, int no_rtl
)
4084 int ncum
= *get_cumulative_args (cum
);
4086 /* cum currently points to the last named argument, we want to
4087 start at the next argument. */
4088 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4090 offset
= -STACK_POINTER_OFFSET
;
4091 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4093 tmp
= gen_frame_mem (V4SImode
,
4094 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4096 emit_move_insn (tmp
,
4097 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4100 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4105 spu_conditional_register_usage (void)
4109 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4110 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4114 /* This is called any time we inspect the alignment of a register for
4117 reg_aligned_for_addr (rtx x
)
4120 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4121 return REGNO_POINTER_ALIGN (regno
) >= 128;
4124 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4125 into its SYMBOL_REF_FLAGS. */
4127 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4129 default_encode_section_info (decl
, rtl
, first
);
4131 /* If a variable has a forced alignment to < 16 bytes, mark it with
4132 SYMBOL_FLAG_ALIGN1. */
4133 if (TREE_CODE (decl
) == VAR_DECL
4134 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4135 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4138 /* Return TRUE if we are certain the mem refers to a complete object
4139 which is both 16-byte aligned and padded to a 16-byte boundary. This
4140 would make it safe to store with a single instruction.
4141 We guarantee the alignment and padding for static objects by aligning
4142 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4143 FIXME: We currently cannot guarantee this for objects on the stack
4144 because assign_parm_setup_stack calls assign_stack_local with the
4145 alignment of the parameter mode and in that case the alignment never
4146 gets adjusted by LOCAL_ALIGNMENT. */
4148 store_with_one_insn_p (rtx mem
)
4150 enum machine_mode mode
= GET_MODE (mem
);
4151 rtx addr
= XEXP (mem
, 0);
4152 if (mode
== BLKmode
)
4154 if (GET_MODE_SIZE (mode
) >= 16)
4156 /* Only static objects. */
4157 if (GET_CODE (addr
) == SYMBOL_REF
)
4159 /* We use the associated declaration to make sure the access is
4160 referring to the whole object.
4161 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4162 if it is necessary. Will there be cases where one exists, and
4163 the other does not? Will there be cases where both exist, but
4164 have different types? */
4165 tree decl
= MEM_EXPR (mem
);
4167 && TREE_CODE (decl
) == VAR_DECL
4168 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4170 decl
= SYMBOL_REF_DECL (addr
);
4172 && TREE_CODE (decl
) == VAR_DECL
4173 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4179 /* Return 1 when the address is not valid for a simple load and store as
4180 required by the '_mov*' patterns. We could make this less strict
4181 for loads, but we prefer mem's to look the same so they are more
4182 likely to be merged. */
4184 address_needs_split (rtx mem
)
4186 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4187 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4188 || !(store_with_one_insn_p (mem
)
4189 || mem_is_padded_component_ref (mem
))))
4195 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4196 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4197 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4199 /* MEM is known to be an __ea qualified memory access. Emit a call to
4200 fetch the ppu memory to local store, and return its address in local
4204 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4208 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4209 if (!cache_fetch_dirty
)
4210 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4211 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4212 2, ea_addr
, EAmode
, ndirty
, SImode
);
4217 cache_fetch
= init_one_libfunc ("__cache_fetch");
4218 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4219 1, ea_addr
, EAmode
);
4223 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4224 dirty bit marking, inline.
4226 The cache control data structure is an array of
4228 struct __cache_tag_array
4230 unsigned int tag_lo[4];
4231 unsigned int tag_hi[4];
4232 void *data_pointer[4];
4234 vector unsigned short dirty_bits[4];
4238 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4242 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4243 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4244 rtx index_mask
= gen_reg_rtx (SImode
);
4245 rtx tag_arr
= gen_reg_rtx (Pmode
);
4246 rtx splat_mask
= gen_reg_rtx (TImode
);
4247 rtx splat
= gen_reg_rtx (V4SImode
);
4248 rtx splat_hi
= NULL_RTX
;
4249 rtx tag_index
= gen_reg_rtx (Pmode
);
4250 rtx block_off
= gen_reg_rtx (SImode
);
4251 rtx tag_addr
= gen_reg_rtx (Pmode
);
4252 rtx tag
= gen_reg_rtx (V4SImode
);
4253 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4254 rtx cache_tag_hi
= NULL_RTX
;
4255 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4256 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4257 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4258 rtx tag_equal_hi
= NULL_RTX
;
4259 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4260 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4261 rtx eq_index
= gen_reg_rtx (SImode
);
4262 rtx bcomp
, hit_label
, hit_ref
, cont_label
, insn
;
4264 if (spu_ea_model
!= 32)
4266 splat_hi
= gen_reg_rtx (V4SImode
);
4267 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4268 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4271 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4272 emit_move_insn (tag_arr
, tag_arr_sym
);
4273 v
= 0x0001020300010203LL
;
4274 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4275 ea_addr_si
= ea_addr
;
4276 if (spu_ea_model
!= 32)
4277 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4279 /* tag_index = ea_addr & (tag_array_size - 128) */
4280 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4282 /* splat ea_addr to all 4 slots. */
4283 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4284 /* Similarly for high 32 bits of ea_addr. */
4285 if (spu_ea_model
!= 32)
4286 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4288 /* block_off = ea_addr & 127 */
4289 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4291 /* tag_addr = tag_arr + tag_index */
4292 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4294 /* Read cache tags. */
4295 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4296 if (spu_ea_model
!= 32)
4297 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4298 plus_constant (Pmode
,
4301 /* tag = ea_addr & -128 */
4302 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4304 /* Read all four cache data pointers. */
4305 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4306 plus_constant (Pmode
,
4310 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4311 if (spu_ea_model
!= 32)
4313 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4314 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4317 /* At most one of the tags compare equal, so tag_equal has one
4318 32-bit slot set to all 1's, with the other slots all zero.
4319 gbb picks off low bit from each byte in the 128-bit registers,
4320 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4322 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4323 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4325 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4326 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4328 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4329 (rotating eq_index mod 16 bytes). */
4330 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4331 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4333 /* Add block offset to form final data address. */
4334 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4336 /* Check that we did hit. */
4337 hit_label
= gen_label_rtx ();
4338 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4339 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4340 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4341 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4343 /* Say that this branch is very likely to happen. */
4344 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4345 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (v
));
4347 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4348 cont_label
= gen_label_rtx ();
4349 emit_jump_insn (gen_jump (cont_label
));
4352 emit_label (hit_label
);
4357 rtx dirty_bits
= gen_reg_rtx (TImode
);
4358 rtx dirty_off
= gen_reg_rtx (SImode
);
4359 rtx dirty_128
= gen_reg_rtx (TImode
);
4360 rtx neg_block_off
= gen_reg_rtx (SImode
);
4362 /* Set up mask with one dirty bit per byte of the mem we are
4363 writing, starting from top bit. */
4365 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4366 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4371 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4373 /* Form index into cache dirty_bits. eq_index is one of
4374 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4375 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4376 offset to each of the four dirty_bits elements. */
4377 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4379 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4381 /* Rotate bit mask to proper bit. */
4382 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4383 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4384 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4386 /* Or in the new dirty bits. */
4387 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4390 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4393 emit_label (cont_label
);
4397 expand_ea_mem (rtx mem
, bool is_store
)
4400 rtx data_addr
= gen_reg_rtx (Pmode
);
4403 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4404 if (optimize_size
|| optimize
== 0)
4405 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4407 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4409 if (ea_alias_set
== -1)
4410 ea_alias_set
= new_alias_set ();
4412 /* We generate a new MEM RTX to refer to the copy of the data
4413 in the cache. We do not copy memory attributes (except the
4414 alignment) from the original MEM, as they may no longer apply
4415 to the cache copy. */
4416 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4417 set_mem_alias_set (new_mem
, ea_alias_set
);
4418 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4424 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4426 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4428 /* Perform the move in the destination SUBREG's inner mode. */
4429 ops
[0] = SUBREG_REG (ops
[0]);
4430 mode
= GET_MODE (ops
[0]);
4431 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4432 gcc_assert (ops
[1]);
4435 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4437 rtx from
= SUBREG_REG (ops
[1]);
4438 enum machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4440 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4441 && GET_MODE_CLASS (imode
) == MODE_INT
4442 && subreg_lowpart_p (ops
[1]));
4444 if (GET_MODE_SIZE (imode
) < 4)
4446 if (imode
!= GET_MODE (from
))
4447 from
= gen_rtx_SUBREG (imode
, from
, 0);
4449 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4451 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4453 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4456 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4460 /* At least one of the operands needs to be a register. */
4461 if ((reload_in_progress
| reload_completed
) == 0
4462 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4464 rtx temp
= force_reg (mode
, ops
[1]);
4465 emit_move_insn (ops
[0], temp
);
4468 if (reload_in_progress
|| reload_completed
)
4470 if (CONSTANT_P (ops
[1]))
4471 return spu_split_immediate (ops
);
4475 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4477 if (GET_CODE (ops
[1]) == CONST_INT
)
4479 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4480 if (val
!= INTVAL (ops
[1]))
4482 emit_move_insn (ops
[0], GEN_INT (val
));
4488 if (MEM_ADDR_SPACE (ops
[0]))
4489 ops
[0] = expand_ea_mem (ops
[0], true);
4490 return spu_split_store (ops
);
4494 if (MEM_ADDR_SPACE (ops
[1]))
4495 ops
[1] = expand_ea_mem (ops
[1], false);
4496 return spu_split_load (ops
);
4503 spu_convert_move (rtx dst
, rtx src
)
4505 enum machine_mode mode
= GET_MODE (dst
);
4506 enum machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4508 gcc_assert (GET_MODE (src
) == TImode
);
4509 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4510 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4511 gen_rtx_TRUNCATE (int_mode
,
4512 gen_rtx_LSHIFTRT (TImode
, src
,
4513 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4514 if (int_mode
!= mode
)
4516 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4517 emit_move_insn (dst
, reg
);
4521 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4522 the address from SRC and SRC+16. Return a REG or CONST_INT that
4523 specifies how many bytes to rotate the loaded registers, plus any
4524 extra from EXTRA_ROTQBY. The address and rotate amounts are
4525 normalized to improve merging of loads and rotate computations. */
4527 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4529 rtx addr
= XEXP (src
, 0);
4530 rtx p0
, p1
, rot
, addr0
, addr1
;
4536 if (MEM_ALIGN (src
) >= 128)
4537 /* Address is already aligned; simply perform a TImode load. */ ;
4538 else if (GET_CODE (addr
) == PLUS
)
4541 aligned reg + aligned reg => lqx
4542 aligned reg + unaligned reg => lqx, rotqby
4543 aligned reg + aligned const => lqd
4544 aligned reg + unaligned const => lqd, rotqbyi
4545 unaligned reg + aligned reg => lqx, rotqby
4546 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4547 unaligned reg + aligned const => lqd, rotqby
4548 unaligned reg + unaligned const -> not allowed by legitimate address
4550 p0
= XEXP (addr
, 0);
4551 p1
= XEXP (addr
, 1);
4552 if (!reg_aligned_for_addr (p0
))
4554 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4556 rot
= gen_reg_rtx (SImode
);
4557 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4559 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4563 && INTVAL (p1
) * BITS_PER_UNIT
4564 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4566 rot
= gen_reg_rtx (SImode
);
4567 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4572 rtx x
= gen_reg_rtx (SImode
);
4573 emit_move_insn (x
, p1
);
4574 if (!spu_arith_operand (p1
, SImode
))
4576 rot
= gen_reg_rtx (SImode
);
4577 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4578 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4586 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4588 rot_amt
= INTVAL (p1
) & 15;
4589 if (INTVAL (p1
) & -16)
4591 p1
= GEN_INT (INTVAL (p1
) & -16);
4592 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4597 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4601 else if (REG_P (addr
))
4603 if (!reg_aligned_for_addr (addr
))
4606 else if (GET_CODE (addr
) == CONST
)
4608 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4609 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4610 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4612 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4614 addr
= gen_rtx_CONST (Pmode
,
4615 gen_rtx_PLUS (Pmode
,
4616 XEXP (XEXP (addr
, 0), 0),
4617 GEN_INT (rot_amt
& -16)));
4619 addr
= XEXP (XEXP (addr
, 0), 0);
4623 rot
= gen_reg_rtx (Pmode
);
4624 emit_move_insn (rot
, addr
);
4627 else if (GET_CODE (addr
) == CONST_INT
)
4629 rot_amt
= INTVAL (addr
);
4630 addr
= GEN_INT (rot_amt
& -16);
4632 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4634 rot
= gen_reg_rtx (Pmode
);
4635 emit_move_insn (rot
, addr
);
4638 rot_amt
+= extra_rotby
;
4644 rtx x
= gen_reg_rtx (SImode
);
4645 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4649 if (!rot
&& rot_amt
)
4650 rot
= GEN_INT (rot_amt
);
4652 addr0
= copy_rtx (addr
);
4653 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4654 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4658 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4659 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4660 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4667 spu_split_load (rtx
* ops
)
4669 enum machine_mode mode
= GET_MODE (ops
[0]);
4670 rtx addr
, load
, rot
;
4673 if (GET_MODE_SIZE (mode
) >= 16)
4676 addr
= XEXP (ops
[1], 0);
4677 gcc_assert (GET_CODE (addr
) != AND
);
4679 if (!address_needs_split (ops
[1]))
4681 ops
[1] = change_address (ops
[1], TImode
, addr
);
4682 load
= gen_reg_rtx (TImode
);
4683 emit_insn (gen__movti (load
, ops
[1]));
4684 spu_convert_move (ops
[0], load
);
4688 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4690 load
= gen_reg_rtx (TImode
);
4691 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4694 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4696 spu_convert_move (ops
[0], load
);
4701 spu_split_store (rtx
* ops
)
4703 enum machine_mode mode
= GET_MODE (ops
[0]);
4705 rtx addr
, p0
, p1
, p1_lo
, smem
;
4709 if (GET_MODE_SIZE (mode
) >= 16)
4712 addr
= XEXP (ops
[0], 0);
4713 gcc_assert (GET_CODE (addr
) != AND
);
4715 if (!address_needs_split (ops
[0]))
4717 reg
= gen_reg_rtx (TImode
);
4718 emit_insn (gen_spu_convert (reg
, ops
[1]));
4719 ops
[0] = change_address (ops
[0], TImode
, addr
);
4720 emit_move_insn (ops
[0], reg
);
4724 if (GET_CODE (addr
) == PLUS
)
4727 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4728 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4729 aligned reg + aligned const => lqd, c?d, shuf, stqx
4730 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4731 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4732 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4733 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4734 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4737 p0
= XEXP (addr
, 0);
4738 p1
= p1_lo
= XEXP (addr
, 1);
4739 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4741 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4742 if (reg_aligned_for_addr (p0
))
4744 p1
= GEN_INT (INTVAL (p1
) & -16);
4745 if (p1
== const0_rtx
)
4748 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4752 rtx x
= gen_reg_rtx (SImode
);
4753 emit_move_insn (x
, p1
);
4754 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4758 else if (REG_P (addr
))
4762 p1
= p1_lo
= const0_rtx
;
4767 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4768 p1
= 0; /* aform doesn't use p1 */
4770 if (ALIGNED_SYMBOL_REF_P (addr
))
4772 else if (GET_CODE (addr
) == CONST
4773 && GET_CODE (XEXP (addr
, 0)) == PLUS
4774 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4775 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4777 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4779 addr
= gen_rtx_CONST (Pmode
,
4780 gen_rtx_PLUS (Pmode
,
4781 XEXP (XEXP (addr
, 0), 0),
4782 GEN_INT (v
& -16)));
4784 addr
= XEXP (XEXP (addr
, 0), 0);
4785 p1_lo
= GEN_INT (v
& 15);
4787 else if (GET_CODE (addr
) == CONST_INT
)
4789 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4790 addr
= GEN_INT (INTVAL (addr
) & -16);
4794 p1_lo
= gen_reg_rtx (SImode
);
4795 emit_move_insn (p1_lo
, addr
);
4799 gcc_assert (aform
== 0 || aform
== 1);
4800 reg
= gen_reg_rtx (TImode
);
4802 scalar
= store_with_one_insn_p (ops
[0]);
4805 /* We could copy the flags from the ops[0] MEM to mem here,
4806 We don't because we want this load to be optimized away if
4807 possible, and copying the flags will prevent that in certain
4808 cases, e.g. consider the volatile flag. */
4810 rtx pat
= gen_reg_rtx (TImode
);
4811 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4812 set_mem_alias_set (lmem
, 0);
4813 emit_insn (gen_movti (reg
, lmem
));
4815 if (!p0
|| reg_aligned_for_addr (p0
))
4816 p0
= stack_pointer_rtx
;
4820 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4821 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4825 if (GET_CODE (ops
[1]) == REG
)
4826 emit_insn (gen_spu_convert (reg
, ops
[1]));
4827 else if (GET_CODE (ops
[1]) == SUBREG
)
4828 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4833 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4834 emit_insn (gen_ashlti3
4835 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4837 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4838 /* We can't use the previous alias set because the memory has changed
4839 size and can potentially overlap objects of other types. */
4840 set_mem_alias_set (smem
, 0);
4842 emit_insn (gen_movti (smem
, reg
));
4846 /* Return TRUE if X is MEM which is a struct member reference
4847 and the member can safely be loaded and stored with a single
4848 instruction because it is padded. */
4850 mem_is_padded_component_ref (rtx x
)
4852 tree t
= MEM_EXPR (x
);
4854 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4856 t
= TREE_OPERAND (t
, 1);
4857 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4858 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4860 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4861 r
= DECL_FIELD_CONTEXT (t
);
4862 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4864 /* Make sure they are the same mode */
4865 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4867 /* If there are no following fields then the field alignment assures
4868 the structure is padded to the alignment which means this field is
4870 if (TREE_CHAIN (t
) == 0)
4872 /* If the following field is also aligned then this field will be
4875 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4880 /* Parse the -mfixed-range= option string. */
4882 fix_range (const char *const_str
)
4885 char *str
, *dash
, *comma
;
4887 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4888 REG2 are either register names or register numbers. The effect
4889 of this option is to mark the registers in the range from REG1 to
4890 REG2 as ``fixed'' so they won't be used by the compiler. */
4892 i
= strlen (const_str
);
4893 str
= (char *) alloca (i
+ 1);
4894 memcpy (str
, const_str
, i
+ 1);
4898 dash
= strchr (str
, '-');
4901 warning (0, "value of -mfixed-range must have form REG1-REG2");
4905 comma
= strchr (dash
+ 1, ',');
4909 first
= decode_reg_name (str
);
4912 warning (0, "unknown register name: %s", str
);
4916 last
= decode_reg_name (dash
+ 1);
4919 warning (0, "unknown register name: %s", dash
+ 1);
4927 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4931 for (i
= first
; i
<= last
; ++i
)
4932 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4942 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4943 can be generated using the fsmbi instruction. */
4945 fsmbi_const_p (rtx x
)
4949 /* We can always choose TImode for CONST_INT because the high bits
4950 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4951 enum immediate_class c
= classify_immediate (x
, TImode
);
4952 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4957 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4958 can be generated using the cbd, chd, cwd or cdd instruction. */
4960 cpat_const_p (rtx x
, enum machine_mode mode
)
4964 enum immediate_class c
= classify_immediate (x
, mode
);
4965 return c
== IC_CPAT
;
4971 gen_cpat_const (rtx
* ops
)
4973 unsigned char dst
[16];
4974 int i
, offset
, shift
, isize
;
4975 if (GET_CODE (ops
[3]) != CONST_INT
4976 || GET_CODE (ops
[2]) != CONST_INT
4977 || (GET_CODE (ops
[1]) != CONST_INT
4978 && GET_CODE (ops
[1]) != REG
))
4980 if (GET_CODE (ops
[1]) == REG
4981 && (!REG_POINTER (ops
[1])
4982 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
4985 for (i
= 0; i
< 16; i
++)
4987 isize
= INTVAL (ops
[3]);
4990 else if (isize
== 2)
4994 offset
= (INTVAL (ops
[2]) +
4995 (GET_CODE (ops
[1]) ==
4996 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
4997 for (i
= 0; i
< isize
; i
++)
4998 dst
[offset
+ i
] = i
+ shift
;
4999 return array_to_constant (TImode
, dst
);
5002 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5003 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5004 than 16 bytes, the value is repeated across the rest of the array. */
5006 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
5011 memset (arr
, 0, 16);
5012 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5013 if (GET_CODE (x
) == CONST_INT
5014 || (GET_CODE (x
) == CONST_DOUBLE
5015 && (mode
== SFmode
|| mode
== DFmode
)))
5017 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5019 if (GET_CODE (x
) == CONST_DOUBLE
)
5020 val
= const_double_to_hwint (x
);
5023 first
= GET_MODE_SIZE (mode
) - 1;
5024 for (i
= first
; i
>= 0; i
--)
5026 arr
[i
] = val
& 0xff;
5029 /* Splat the constant across the whole array. */
5030 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5033 j
= (j
== first
) ? 0 : j
+ 1;
5036 else if (GET_CODE (x
) == CONST_DOUBLE
)
5038 val
= CONST_DOUBLE_LOW (x
);
5039 for (i
= 15; i
>= 8; i
--)
5041 arr
[i
] = val
& 0xff;
5044 val
= CONST_DOUBLE_HIGH (x
);
5045 for (i
= 7; i
>= 0; i
--)
5047 arr
[i
] = val
& 0xff;
5051 else if (GET_CODE (x
) == CONST_VECTOR
)
5055 mode
= GET_MODE_INNER (mode
);
5056 units
= CONST_VECTOR_NUNITS (x
);
5057 for (i
= 0; i
< units
; i
++)
5059 elt
= CONST_VECTOR_ELT (x
, i
);
5060 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5062 if (GET_CODE (elt
) == CONST_DOUBLE
)
5063 val
= const_double_to_hwint (elt
);
5066 first
= GET_MODE_SIZE (mode
) - 1;
5067 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5069 for (j
= first
; j
>= 0; j
--)
5071 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5081 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5082 smaller than 16 bytes, use the bytes that would represent that value
5083 in a register, e.g., for QImode return the value of arr[3]. */
5085 array_to_constant (enum machine_mode mode
, const unsigned char arr
[16])
5087 enum machine_mode inner_mode
;
5089 int units
, size
, i
, j
, k
;
5092 if (GET_MODE_CLASS (mode
) == MODE_INT
5093 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5095 j
= GET_MODE_SIZE (mode
);
5096 i
= j
< 4 ? 4 - j
: 0;
5097 for (val
= 0; i
< j
; i
++)
5098 val
= (val
<< 8) | arr
[i
];
5099 val
= trunc_int_for_mode (val
, mode
);
5100 return GEN_INT (val
);
5106 for (i
= high
= 0; i
< 8; i
++)
5107 high
= (high
<< 8) | arr
[i
];
5108 for (i
= 8, val
= 0; i
< 16; i
++)
5109 val
= (val
<< 8) | arr
[i
];
5110 return immed_double_const (val
, high
, TImode
);
5114 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5115 val
= trunc_int_for_mode (val
, SImode
);
5116 return hwint_to_const_double (SFmode
, val
);
5120 for (i
= 0, val
= 0; i
< 8; i
++)
5121 val
= (val
<< 8) | arr
[i
];
5122 return hwint_to_const_double (DFmode
, val
);
5125 if (!VECTOR_MODE_P (mode
))
5128 units
= GET_MODE_NUNITS (mode
);
5129 size
= GET_MODE_UNIT_SIZE (mode
);
5130 inner_mode
= GET_MODE_INNER (mode
);
5131 v
= rtvec_alloc (units
);
5133 for (k
= i
= 0; i
< units
; ++i
)
5136 for (j
= 0; j
< size
; j
++, k
++)
5137 val
= (val
<< 8) | arr
[k
];
5139 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5140 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5142 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5147 return gen_rtx_CONST_VECTOR (mode
, v
);
5151 reloc_diagnostic (rtx x
)
5154 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5157 if (GET_CODE (x
) == SYMBOL_REF
)
5158 decl
= SYMBOL_REF_DECL (x
);
5159 else if (GET_CODE (x
) == CONST
5160 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5161 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5163 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5164 if (decl
&& !DECL_P (decl
))
5167 /* The decl could be a string constant. */
5168 if (decl
&& DECL_P (decl
))
5171 /* We use last_assemble_variable_decl to get line information. It's
5172 not always going to be right and might not even be close, but will
5173 be right for the more common cases. */
5174 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5175 loc
= DECL_SOURCE_LOCATION (decl
);
5177 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5179 if (TARGET_WARN_RELOC
)
5181 "creating run-time relocation for %qD", decl
);
5184 "creating run-time relocation for %qD", decl
);
5188 if (TARGET_WARN_RELOC
)
5189 warning_at (input_location
, 0, "creating run-time relocation");
5191 error_at (input_location
, "creating run-time relocation");
5195 /* Hook into assemble_integer so we can generate an error for run-time
5196 relocations. The SPU ABI disallows them. */
5198 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5200 /* By default run-time relocations aren't supported, but we allow them
5201 in case users support it in their own run-time loader. And we provide
5202 a warning for those users that don't. */
5203 if ((GET_CODE (x
) == SYMBOL_REF
)
5204 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5205 reloc_diagnostic (x
);
5207 return default_assemble_integer (x
, size
, aligned_p
);
5211 spu_asm_globalize_label (FILE * file
, const char *name
)
5213 fputs ("\t.global\t", file
);
5214 assemble_name (file
, name
);
5219 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
,
5220 int opno ATTRIBUTE_UNUSED
, int *total
,
5221 bool speed ATTRIBUTE_UNUSED
)
5223 enum machine_mode mode
= GET_MODE (x
);
5224 int cost
= COSTS_N_INSNS (2);
5226 /* Folding to a CONST_VECTOR will use extra space but there might
5227 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5228 only if it allows us to fold away multiple insns. Changing the cost
5229 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5230 because this cost will only be compared against a single insn.
5231 if (code == CONST_VECTOR)
5232 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5235 /* Use defaults for float operations. Not accurate but good enough. */
5238 *total
= COSTS_N_INSNS (13);
5243 *total
= COSTS_N_INSNS (6);
5249 if (satisfies_constraint_K (x
))
5251 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5252 *total
= COSTS_N_INSNS (1);
5254 *total
= COSTS_N_INSNS (3);
5258 *total
= COSTS_N_INSNS (3);
5263 *total
= COSTS_N_INSNS (0);
5267 *total
= COSTS_N_INSNS (5);
5271 case FLOAT_TRUNCATE
:
5273 case UNSIGNED_FLOAT
:
5276 *total
= COSTS_N_INSNS (7);
5282 *total
= COSTS_N_INSNS (9);
5289 GET_CODE (XEXP (x
, 0)) ==
5290 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5291 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5293 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5295 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5296 cost
= COSTS_N_INSNS (14);
5297 if ((val
& 0xffff) == 0)
5298 cost
= COSTS_N_INSNS (9);
5299 else if (val
> 0 && val
< 0x10000)
5300 cost
= COSTS_N_INSNS (11);
5309 *total
= COSTS_N_INSNS (20);
5316 *total
= COSTS_N_INSNS (4);
5319 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5320 *total
= COSTS_N_INSNS (0);
5322 *total
= COSTS_N_INSNS (4);
5325 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5326 if (GET_MODE_CLASS (mode
) == MODE_INT
5327 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5328 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5329 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5334 static enum machine_mode
5335 spu_unwind_word_mode (void)
5340 /* Decide whether we can make a sibling call to a function. DECL is the
5341 declaration of the function being targeted by the call and EXP is the
5342 CALL_EXPR representing the call. */
5344 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5346 return decl
&& !TARGET_LARGE_MEM
;
5349 /* We need to correctly update the back chain pointer and the Available
5350 Stack Size (which is in the second slot of the sp register.) */
5352 spu_allocate_stack (rtx op0
, rtx op1
)
5355 rtx chain
= gen_reg_rtx (V4SImode
);
5356 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5357 rtx sp
= gen_reg_rtx (V4SImode
);
5358 rtx splatted
= gen_reg_rtx (V4SImode
);
5359 rtx pat
= gen_reg_rtx (TImode
);
5361 /* copy the back chain so we can save it back again. */
5362 emit_move_insn (chain
, stack_bot
);
5364 op1
= force_reg (SImode
, op1
);
5366 v
= 0x1020300010203ll
;
5367 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5368 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5370 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5371 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5373 if (flag_stack_check
)
5375 rtx avail
= gen_reg_rtx(SImode
);
5376 rtx result
= gen_reg_rtx(SImode
);
5377 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5378 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5379 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5382 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5384 emit_move_insn (stack_bot
, chain
);
5386 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5390 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5392 static unsigned char arr
[16] =
5393 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5394 rtx temp
= gen_reg_rtx (SImode
);
5395 rtx temp2
= gen_reg_rtx (SImode
);
5396 rtx temp3
= gen_reg_rtx (V4SImode
);
5397 rtx temp4
= gen_reg_rtx (V4SImode
);
5398 rtx pat
= gen_reg_rtx (TImode
);
5399 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5401 /* Restore the backchain from the first word, sp from the second. */
5402 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5403 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5405 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5407 /* Compute Available Stack Size for sp */
5408 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5409 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5411 /* Compute Available Stack Size for back chain */
5412 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5413 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5414 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5416 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5417 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5421 spu_init_libfuncs (void)
5423 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5424 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5425 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5426 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5427 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5428 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5429 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5430 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5431 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5432 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5433 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5434 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5436 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5437 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5439 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5440 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5441 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5442 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5443 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5444 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5445 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5446 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5447 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5448 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5449 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5450 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5452 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5453 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5454 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5455 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5456 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5457 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5460 /* Make a subreg, stripping any existing subreg. We could possibly just
5461 call simplify_subreg, but in this case we know what we want. */
5463 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5465 if (GET_CODE (x
) == SUBREG
)
5467 if (GET_MODE (x
) == mode
)
5469 return gen_rtx_SUBREG (mode
, x
, 0);
5473 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5475 return (TYPE_MODE (type
) == BLKmode
5477 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5478 || int_size_in_bytes (type
) >
5479 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5482 /* Create the built-in types and functions */
5484 enum spu_function_code
5486 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5487 #include "spu-builtins.def"
5492 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5494 struct spu_builtin_description spu_builtins
[] = {
5495 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5496 {fcode, icode, name, type, params},
5497 #include "spu-builtins.def"
5501 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5503 /* Returns the spu builtin decl for CODE. */
5506 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5508 if (code
>= NUM_SPU_BUILTINS
)
5509 return error_mark_node
;
5511 return spu_builtin_decls
[code
];
5516 spu_init_builtins (void)
5518 struct spu_builtin_description
*d
;
5521 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5522 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5523 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5524 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5525 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5526 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5528 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5529 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5530 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5531 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5533 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5535 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5536 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5537 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5538 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5539 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5540 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5541 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5542 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5543 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5544 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5545 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5546 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5548 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5549 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5550 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5551 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5552 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5553 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5554 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5555 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5557 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5558 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5560 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5562 spu_builtin_types
[SPU_BTI_PTR
] =
5563 build_pointer_type (build_qualified_type
5565 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5567 /* For each builtin we build a new prototype. The tree code will make
5568 sure nodes are shared. */
5569 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5572 char name
[64]; /* build_function will make a copy. */
5578 /* Find last parm. */
5579 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5584 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5586 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5588 sprintf (name
, "__builtin_%s", d
->name
);
5589 spu_builtin_decls
[i
] =
5590 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5591 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5592 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5594 /* These builtins don't throw. */
5595 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5600 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5602 static unsigned char arr
[16] =
5603 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5605 rtx temp
= gen_reg_rtx (Pmode
);
5606 rtx temp2
= gen_reg_rtx (V4SImode
);
5607 rtx temp3
= gen_reg_rtx (V4SImode
);
5608 rtx pat
= gen_reg_rtx (TImode
);
5609 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5611 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5613 /* Restore the sp. */
5614 emit_move_insn (temp
, op1
);
5615 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5617 /* Compute available stack size for sp. */
5618 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5619 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5621 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5622 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5626 spu_safe_dma (HOST_WIDE_INT channel
)
5628 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5632 spu_builtin_splats (rtx ops
[])
5634 enum machine_mode mode
= GET_MODE (ops
[0]);
5635 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5637 unsigned char arr
[16];
5638 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5639 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5643 rtx reg
= gen_reg_rtx (TImode
);
5645 if (GET_CODE (ops
[1]) != REG
5646 && GET_CODE (ops
[1]) != SUBREG
)
5647 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5653 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5659 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5664 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5669 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5675 emit_move_insn (reg
, shuf
);
5676 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5681 spu_builtin_extract (rtx ops
[])
5683 enum machine_mode mode
;
5686 mode
= GET_MODE (ops
[1]);
5688 if (GET_CODE (ops
[2]) == CONST_INT
)
5693 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5696 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5699 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5702 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5705 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5708 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5716 from
= spu_gen_subreg (TImode
, ops
[1]);
5717 rot
= gen_reg_rtx (TImode
);
5718 tmp
= gen_reg_rtx (SImode
);
5723 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5726 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5727 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5731 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5735 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5740 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5742 emit_insn (gen_spu_convert (ops
[0], rot
));
5746 spu_builtin_insert (rtx ops
[])
5748 enum machine_mode mode
= GET_MODE (ops
[0]);
5749 enum machine_mode imode
= GET_MODE_INNER (mode
);
5750 rtx mask
= gen_reg_rtx (TImode
);
5753 if (GET_CODE (ops
[3]) == CONST_INT
)
5754 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5757 offset
= gen_reg_rtx (SImode
);
5758 emit_insn (gen_mulsi3
5759 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5762 (mask
, stack_pointer_rtx
, offset
,
5763 GEN_INT (GET_MODE_SIZE (imode
))));
5764 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5768 spu_builtin_promote (rtx ops
[])
5770 enum machine_mode mode
, imode
;
5771 rtx rot
, from
, offset
;
5774 mode
= GET_MODE (ops
[0]);
5775 imode
= GET_MODE_INNER (mode
);
5777 from
= gen_reg_rtx (TImode
);
5778 rot
= spu_gen_subreg (TImode
, ops
[0]);
5780 emit_insn (gen_spu_convert (from
, ops
[1]));
5782 if (GET_CODE (ops
[2]) == CONST_INT
)
5784 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5785 if (GET_MODE_SIZE (imode
) < 4)
5786 pos
+= 4 - GET_MODE_SIZE (imode
);
5787 offset
= GEN_INT (pos
& 15);
5791 offset
= gen_reg_rtx (SImode
);
5795 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5798 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5799 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5803 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5804 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5808 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5814 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5818 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5820 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5821 rtx shuf
= gen_reg_rtx (V4SImode
);
5822 rtx insn
= gen_reg_rtx (V4SImode
);
5827 fnaddr
= force_reg (SImode
, fnaddr
);
5828 cxt
= force_reg (SImode
, cxt
);
5830 if (TARGET_LARGE_MEM
)
5832 rtx rotl
= gen_reg_rtx (V4SImode
);
5833 rtx mask
= gen_reg_rtx (V4SImode
);
5834 rtx bi
= gen_reg_rtx (SImode
);
5835 static unsigned char const shufa
[16] = {
5836 2, 3, 0, 1, 18, 19, 16, 17,
5837 0, 1, 2, 3, 16, 17, 18, 19
5839 static unsigned char const insna
[16] = {
5841 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5843 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5846 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5847 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5849 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5850 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5851 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5852 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5854 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5855 emit_move_insn (mem
, insn
);
5857 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5858 mem
= adjust_address (m_tramp
, Pmode
, 16);
5859 emit_move_insn (mem
, bi
);
5863 rtx scxt
= gen_reg_rtx (SImode
);
5864 rtx sfnaddr
= gen_reg_rtx (SImode
);
5865 static unsigned char const insna
[16] = {
5866 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5872 shufc
= gen_reg_rtx (TImode
);
5873 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5875 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5876 fits 18 bits and the last 4 are zeros. This will be true if
5877 the stack pointer is initialized to 0x3fff0 at program start,
5878 otherwise the ila instruction will be garbage. */
5880 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5881 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5883 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5884 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5885 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5887 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5888 emit_move_insn (mem
, insn
);
5890 emit_insn (gen_sync ());
5894 spu_warn_func_return (tree decl
)
5896 /* Naked functions are implemented entirely in assembly, including the
5897 return sequence, so suppress warnings about this. */
5898 return !spu_naked_function_p (decl
);
5902 spu_expand_sign_extend (rtx ops
[])
5904 unsigned char arr
[16];
5905 rtx pat
= gen_reg_rtx (TImode
);
5908 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5909 if (GET_MODE (ops
[1]) == QImode
)
5911 sign
= gen_reg_rtx (HImode
);
5912 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5913 for (i
= 0; i
< 16; i
++)
5919 for (i
= 0; i
< 16; i
++)
5921 switch (GET_MODE (ops
[1]))
5924 sign
= gen_reg_rtx (SImode
);
5925 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5927 arr
[last
- 1] = 0x02;
5930 sign
= gen_reg_rtx (SImode
);
5931 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5932 for (i
= 0; i
< 4; i
++)
5933 arr
[last
- i
] = 3 - i
;
5936 sign
= gen_reg_rtx (SImode
);
5937 c
= gen_reg_rtx (SImode
);
5938 emit_insn (gen_spu_convert (c
, ops
[1]));
5939 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5940 for (i
= 0; i
< 8; i
++)
5941 arr
[last
- i
] = 7 - i
;
5947 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5948 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5951 /* expand vector initialization. If there are any constant parts,
5952 load constant parts first. Then load any non-constant parts. */
5954 spu_expand_vector_init (rtx target
, rtx vals
)
5956 enum machine_mode mode
= GET_MODE (target
);
5957 int n_elts
= GET_MODE_NUNITS (mode
);
5959 bool all_same
= true;
5960 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5963 first
= XVECEXP (vals
, 0, 0);
5964 for (i
= 0; i
< n_elts
; ++i
)
5966 x
= XVECEXP (vals
, 0, i
);
5967 if (!(CONST_INT_P (x
)
5968 || GET_CODE (x
) == CONST_DOUBLE
5969 || GET_CODE (x
) == CONST_FIXED
))
5973 if (first_constant
== NULL_RTX
)
5976 if (i
> 0 && !rtx_equal_p (x
, first
))
5980 /* if all elements are the same, use splats to repeat elements */
5983 if (!CONSTANT_P (first
)
5984 && !register_operand (first
, GET_MODE (x
)))
5985 first
= force_reg (GET_MODE (first
), first
);
5986 emit_insn (gen_spu_splats (target
, first
));
5990 /* load constant parts */
5991 if (n_var
!= n_elts
)
5995 emit_move_insn (target
,
5996 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6000 rtx constant_parts_rtx
= copy_rtx (vals
);
6002 gcc_assert (first_constant
!= NULL_RTX
);
6003 /* fill empty slots with the first constant, this increases
6004 our chance of using splats in the recursive call below. */
6005 for (i
= 0; i
< n_elts
; ++i
)
6007 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6008 if (!(CONST_INT_P (x
)
6009 || GET_CODE (x
) == CONST_DOUBLE
6010 || GET_CODE (x
) == CONST_FIXED
))
6011 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6014 spu_expand_vector_init (target
, constant_parts_rtx
);
6018 /* load variable parts */
6021 rtx insert_operands
[4];
6023 insert_operands
[0] = target
;
6024 insert_operands
[2] = target
;
6025 for (i
= 0; i
< n_elts
; ++i
)
6027 x
= XVECEXP (vals
, 0, i
);
6028 if (!(CONST_INT_P (x
)
6029 || GET_CODE (x
) == CONST_DOUBLE
6030 || GET_CODE (x
) == CONST_FIXED
))
6032 if (!register_operand (x
, GET_MODE (x
)))
6033 x
= force_reg (GET_MODE (x
), x
);
6034 insert_operands
[1] = x
;
6035 insert_operands
[3] = GEN_INT (i
);
6036 spu_builtin_insert (insert_operands
);
6042 /* Return insn index for the vector compare instruction for given CODE,
6043 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6046 get_vec_cmp_insn (enum rtx_code code
,
6047 enum machine_mode dest_mode
,
6048 enum machine_mode op_mode
)
6054 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6055 return CODE_FOR_ceq_v16qi
;
6056 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6057 return CODE_FOR_ceq_v8hi
;
6058 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6059 return CODE_FOR_ceq_v4si
;
6060 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6061 return CODE_FOR_ceq_v4sf
;
6062 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6063 return CODE_FOR_ceq_v2df
;
6066 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6067 return CODE_FOR_cgt_v16qi
;
6068 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6069 return CODE_FOR_cgt_v8hi
;
6070 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6071 return CODE_FOR_cgt_v4si
;
6072 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6073 return CODE_FOR_cgt_v4sf
;
6074 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6075 return CODE_FOR_cgt_v2df
;
6078 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6079 return CODE_FOR_clgt_v16qi
;
6080 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6081 return CODE_FOR_clgt_v8hi
;
6082 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6083 return CODE_FOR_clgt_v4si
;
6091 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6092 DMODE is expected destination mode. This is a recursive function. */
6095 spu_emit_vector_compare (enum rtx_code rcode
,
6097 enum machine_mode dmode
)
6101 enum machine_mode dest_mode
;
6102 enum machine_mode op_mode
= GET_MODE (op1
);
6104 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6106 /* Floating point vector compare instructions uses destination V4SImode.
6107 Double floating point vector compare instructions uses destination V2DImode.
6108 Move destination to appropriate mode later. */
6109 if (dmode
== V4SFmode
)
6110 dest_mode
= V4SImode
;
6111 else if (dmode
== V2DFmode
)
6112 dest_mode
= V2DImode
;
6116 mask
= gen_reg_rtx (dest_mode
);
6117 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6119 if (vec_cmp_insn
== -1)
6121 bool swap_operands
= false;
6122 bool try_again
= false;
6127 swap_operands
= true;
6132 swap_operands
= true;
6142 /* Treat A != B as ~(A==B). */
6144 enum rtx_code rev_code
;
6145 enum insn_code nor_code
;
6148 rev_code
= reverse_condition_maybe_unordered (rcode
);
6149 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6151 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6152 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6153 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6154 if (dmode
!= dest_mode
)
6156 rtx temp
= gen_reg_rtx (dest_mode
);
6157 convert_move (temp
, mask
, 0);
6167 /* Try GT/GTU/LT/LTU OR EQ */
6170 enum insn_code ior_code
;
6171 enum rtx_code new_code
;
6175 case GE
: new_code
= GT
; break;
6176 case GEU
: new_code
= GTU
; break;
6177 case LE
: new_code
= LT
; break;
6178 case LEU
: new_code
= LTU
; break;
6183 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6184 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6186 ior_code
= optab_handler (ior_optab
, dest_mode
);
6187 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6188 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6189 if (dmode
!= dest_mode
)
6191 rtx temp
= gen_reg_rtx (dest_mode
);
6192 convert_move (temp
, mask
, 0);
6202 enum insn_code ior_code
;
6204 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6205 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6207 ior_code
= optab_handler (ior_optab
, dest_mode
);
6208 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6209 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6210 if (dmode
!= dest_mode
)
6212 rtx temp
= gen_reg_rtx (dest_mode
);
6213 convert_move (temp
, mask
, 0);
6220 /* Implement as (A==A) & (B==B) */
6223 enum insn_code and_code
;
6225 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6226 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6228 and_code
= optab_handler (and_optab
, dest_mode
);
6229 gcc_assert (and_code
!= CODE_FOR_nothing
);
6230 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6231 if (dmode
!= dest_mode
)
6233 rtx temp
= gen_reg_rtx (dest_mode
);
6234 convert_move (temp
, mask
, 0);
6244 /* You only get two chances. */
6246 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6248 gcc_assert (vec_cmp_insn
!= -1);
6259 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6260 if (dmode
!= dest_mode
)
6262 rtx temp
= gen_reg_rtx (dest_mode
);
6263 convert_move (temp
, mask
, 0);
6270 /* Emit vector conditional expression.
6271 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6272 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6275 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6276 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6278 enum machine_mode dest_mode
= GET_MODE (dest
);
6279 enum rtx_code rcode
= GET_CODE (cond
);
6282 /* Get the vector mask for the given relational operations. */
6283 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6285 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6291 spu_force_reg (enum machine_mode mode
, rtx op
)
6294 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6296 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6297 || GET_MODE (op
) == BLKmode
)
6298 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6302 r
= force_reg (GET_MODE (op
), op
);
6303 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6305 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6310 x
= gen_reg_rtx (mode
);
6311 emit_insn (gen_spu_convert (x
, r
));
6316 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6318 HOST_WIDE_INT v
= 0;
6320 /* Check the range of immediate operands. */
6321 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6323 int range
= p
- SPU_BTI_7
;
6325 if (!CONSTANT_P (op
))
6326 error ("%s expects an integer literal in the range [%d, %d]",
6328 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6330 if (GET_CODE (op
) == CONST
6331 && (GET_CODE (XEXP (op
, 0)) == PLUS
6332 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6334 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6335 op
= XEXP (XEXP (op
, 0), 0);
6337 else if (GET_CODE (op
) == CONST_INT
)
6339 else if (GET_CODE (op
) == CONST_VECTOR
6340 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6341 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6343 /* The default for v is 0 which is valid in every range. */
6344 if (v
< spu_builtin_range
[range
].low
6345 || v
> spu_builtin_range
[range
].high
)
6346 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6348 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6357 /* This is only used in lqa, and stqa. Even though the insns
6358 encode 16 bits of the address (all but the 2 least
6359 significant), only 14 bits are used because it is masked to
6360 be 16 byte aligned. */
6364 /* This is used for lqr and stqr. */
6371 if (GET_CODE (op
) == LABEL_REF
6372 || (GET_CODE (op
) == SYMBOL_REF
6373 && SYMBOL_REF_FUNCTION_P (op
))
6374 || (v
& ((1 << lsbits
) - 1)) != 0)
6375 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6382 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6383 rtx target
, rtx ops
[])
6385 enum insn_code icode
= (enum insn_code
) d
->icode
;
6388 /* Expand the arguments into rtl. */
6390 if (d
->parm
[0] != SPU_BTI_VOID
)
6393 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6395 tree arg
= CALL_EXPR_ARG (exp
, a
);
6398 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6401 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6406 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6407 tree exp
, rtx target
)
6411 enum insn_code icode
= (enum insn_code
) d
->icode
;
6412 enum machine_mode mode
, tmode
;
6417 /* Set up ops[] with values from arglist. */
6418 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6420 /* Handle the target operand which must be operand 0. */
6422 if (d
->parm
[0] != SPU_BTI_VOID
)
6425 /* We prefer the mode specified for the match_operand otherwise
6426 use the mode from the builtin function prototype. */
6427 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6428 if (tmode
== VOIDmode
)
6429 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6431 /* Try to use target because not using it can lead to extra copies
6432 and when we are using all of the registers extra copies leads
6434 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6437 target
= ops
[0] = gen_reg_rtx (tmode
);
6439 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6445 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6447 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6452 arg
= CALL_EXPR_ARG (exp
, 0);
6453 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6454 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6455 addr
= memory_address (mode
, op
);
6458 op
= gen_reg_rtx (GET_MODE (addr
));
6459 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6460 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6461 op
= gen_rtx_MEM (mode
, op
);
6463 pat
= GEN_FCN (icode
) (target
, op
);
6470 /* Ignore align_hint, but still expand it's args in case they have
6472 if (icode
== CODE_FOR_spu_align_hint
)
6475 /* Handle the rest of the operands. */
6476 for (p
= 1; i
< n_operands
; i
++, p
++)
6478 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6479 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6481 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6483 /* mode can be VOIDmode here for labels */
6485 /* For specific intrinsics with an immediate operand, e.g.,
6486 si_ai(), we sometimes need to convert the scalar argument to a
6487 vector argument by splatting the scalar. */
6488 if (VECTOR_MODE_P (mode
)
6489 && (GET_CODE (ops
[i
]) == CONST_INT
6490 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6491 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6493 if (GET_CODE (ops
[i
]) == CONST_INT
)
6494 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6497 rtx reg
= gen_reg_rtx (mode
);
6498 enum machine_mode imode
= GET_MODE_INNER (mode
);
6499 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6500 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6501 if (imode
!= GET_MODE (ops
[i
]))
6502 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6503 TYPE_UNSIGNED (spu_builtin_types
6505 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6510 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6512 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6513 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6519 pat
= GEN_FCN (icode
) (0);
6522 pat
= GEN_FCN (icode
) (ops
[0]);
6525 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6528 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6531 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6534 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6537 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6546 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6547 emit_call_insn (pat
);
6548 else if (d
->type
== B_JUMP
)
6550 emit_jump_insn (pat
);
6556 return_type
= spu_builtin_types
[d
->parm
[0]];
6557 if (d
->parm
[0] != SPU_BTI_VOID
6558 && GET_MODE (target
) != TYPE_MODE (return_type
))
6560 /* target is the return value. It should always be the mode of
6561 the builtin function prototype. */
6562 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6569 spu_expand_builtin (tree exp
,
6571 rtx subtarget ATTRIBUTE_UNUSED
,
6572 enum machine_mode mode ATTRIBUTE_UNUSED
,
6573 int ignore ATTRIBUTE_UNUSED
)
6575 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6576 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6577 struct spu_builtin_description
*d
;
6579 if (fcode
< NUM_SPU_BUILTINS
)
6581 d
= &spu_builtins
[fcode
];
6583 return spu_expand_builtin_1 (d
, exp
, target
);
6588 /* Implement targetm.vectorize.builtin_mask_for_load. */
6590 spu_builtin_mask_for_load (void)
6592 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6595 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6597 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6599 int misalign ATTRIBUTE_UNUSED
)
6603 switch (type_of_cost
)
6611 case cond_branch_not_taken
:
6613 case vec_promote_demote
:
6620 /* Load + rotate. */
6623 case unaligned_load
:
6626 case cond_branch_taken
:
6630 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6631 return elements
/ 2 + 1;
6638 /* Implement targetm.vectorize.init_cost. */
6641 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6643 unsigned *cost
= XNEWVEC (unsigned, 3);
6644 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6648 /* Implement targetm.vectorize.add_stmt_cost. */
6651 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6652 struct _stmt_vec_info
*stmt_info
, int misalign
,
6653 enum vect_cost_model_location where
)
6655 unsigned *cost
= (unsigned *) data
;
6656 unsigned retval
= 0;
6658 if (flag_vect_cost_model
)
6660 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6661 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6663 /* Statements in an inner loop relative to the loop being
6664 vectorized are weighted more heavily. The value here is
6665 arbitrary and could potentially be improved with analysis. */
6666 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6667 count
*= 50; /* FIXME. */
6669 retval
= (unsigned) (count
* stmt_cost
);
6670 cost
[where
] += retval
;
6676 /* Implement targetm.vectorize.finish_cost. */
6679 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6680 unsigned *body_cost
, unsigned *epilogue_cost
)
6682 unsigned *cost
= (unsigned *) data
;
6683 *prologue_cost
= cost
[vect_prologue
];
6684 *body_cost
= cost
[vect_body
];
6685 *epilogue_cost
= cost
[vect_epilogue
];
6688 /* Implement targetm.vectorize.destroy_cost_data. */
6691 spu_destroy_cost_data (void *data
)
6696 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6697 after applying N number of iterations. This routine does not determine
6698 how may iterations are required to reach desired alignment. */
6701 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6706 /* All other types are naturally aligned. */
6710 /* Return the appropriate mode for a named address pointer. */
6711 static enum machine_mode
6712 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6716 case ADDR_SPACE_GENERIC
:
6725 /* Return the appropriate mode for a named address address. */
6726 static enum machine_mode
6727 spu_addr_space_address_mode (addr_space_t addrspace
)
6731 case ADDR_SPACE_GENERIC
:
6740 /* Determine if one named address space is a subset of another. */
6743 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6745 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6746 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6748 if (subset
== superset
)
6751 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6752 being subsets but instead as disjoint address spaces. */
6753 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6757 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6760 /* Convert from one address space to another. */
6762 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6764 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6765 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6767 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6768 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6770 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6774 ls
= gen_const_mem (DImode
,
6775 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6776 set_mem_align (ls
, 128);
6778 result
= gen_reg_rtx (Pmode
);
6779 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6780 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6781 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6782 ls
, const0_rtx
, Pmode
, 1);
6784 emit_insn (gen_subsi3 (result
, op
, ls
));
6789 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6793 ls
= gen_const_mem (DImode
,
6794 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6795 set_mem_align (ls
, 128);
6797 result
= gen_reg_rtx (EAmode
);
6798 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6799 op
= force_reg (Pmode
, op
);
6800 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6801 ls
, const0_rtx
, EAmode
, 1);
6802 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6804 if (EAmode
== SImode
)
6805 emit_insn (gen_addsi3 (result
, op
, ls
));
6807 emit_insn (gen_adddi3 (result
, op
, ls
));
6817 /* Count the total number of instructions in each pipe and return the
6818 maximum, which is used as the Minimum Iteration Interval (MII)
6819 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6820 -2 are instructions that can go in pipe0 or pipe1. */
6822 spu_sms_res_mii (struct ddg
*g
)
6825 unsigned t
[4] = {0, 0, 0, 0};
6827 for (i
= 0; i
< g
->num_nodes
; i
++)
6829 rtx insn
= g
->nodes
[i
].insn
;
6830 int p
= get_pipe (insn
) + 2;
6832 gcc_assert (p
>= 0);
6836 if (dump_file
&& INSN_P (insn
))
6837 fprintf (dump_file
, "i%d %s %d %d\n",
6839 insn_data
[INSN_CODE(insn
)].name
,
6843 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6845 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6850 spu_init_expanders (void)
6855 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6856 frame_pointer_needed is true. We don't know that until we're
6857 expanding the prologue. */
6858 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6860 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6861 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6862 to be treated as aligned, so generate them here. */
6863 r0
= gen_reg_rtx (SImode
);
6864 r1
= gen_reg_rtx (SImode
);
6865 mark_reg_pointer (r0
, 128);
6866 mark_reg_pointer (r1
, 128);
6867 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6868 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6872 static enum machine_mode
6873 spu_libgcc_cmp_return_mode (void)
6876 /* For SPU word mode is TI mode so it is better to use SImode
6877 for compare returns. */
6881 static enum machine_mode
6882 spu_libgcc_shift_count_mode (void)
6884 /* For SPU word mode is TI mode so it is better to use SImode
6885 for shift counts. */
6889 /* Implement targetm.section_type_flags. */
6891 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6893 /* .toe needs to have type @nobits. */
6894 if (strcmp (name
, ".toe") == 0)
6896 /* Don't load _ea into the current address space. */
6897 if (strcmp (name
, "._ea") == 0)
6898 return SECTION_WRITE
| SECTION_DEBUG
;
6899 return default_section_type_flags (decl
, name
, reloc
);
6902 /* Implement targetm.select_section. */
6904 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6906 /* Variables and constants defined in the __ea address space
6907 go into a special section named "._ea". */
6908 if (TREE_TYPE (decl
) != error_mark_node
6909 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6911 /* We might get called with string constants, but get_named_section
6912 doesn't like them as they are not DECLs. Also, we need to set
6913 flags in that case. */
6915 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6917 return get_named_section (decl
, "._ea", reloc
);
6920 return default_elf_select_section (decl
, reloc
, align
);
6923 /* Implement targetm.unique_section. */
6925 spu_unique_section (tree decl
, int reloc
)
6927 /* We don't support unique section names in the __ea address
6929 if (TREE_TYPE (decl
) != error_mark_node
6930 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6933 default_unique_section (decl
, reloc
);
6936 /* Generate a constant or register which contains 2^SCALE. We assume
6937 the result is valid for MODE. Currently, MODE must be V4SFmode and
6938 SCALE must be SImode. */
6940 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
6942 gcc_assert (mode
== V4SFmode
);
6943 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6944 if (GET_CODE (scale
) != CONST_INT
)
6946 /* unsigned int exp = (127 + scale) << 23;
6947 __vector float m = (__vector float) spu_splats (exp); */
6948 rtx reg
= force_reg (SImode
, scale
);
6949 rtx exp
= gen_reg_rtx (SImode
);
6950 rtx mul
= gen_reg_rtx (mode
);
6951 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6952 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6953 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6958 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6959 unsigned char arr
[16];
6960 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6961 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6962 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6963 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6964 return array_to_constant (mode
, arr
);
6968 /* After reload, just change the convert into a move instruction
6969 or a dead instruction. */
6971 spu_split_convert (rtx ops
[])
6973 if (REGNO (ops
[0]) == REGNO (ops
[1]))
6974 emit_note (NOTE_INSN_DELETED
);
6977 /* Use TImode always as this might help hard reg copyprop. */
6978 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
6979 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
6980 emit_insn (gen_move_insn (op0
, op1
));
6985 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
6987 fprintf (file
, "# profile\n");
6988 fprintf (file
, "brsl $75, _mcount\n");
6991 /* Implement targetm.ref_may_alias_errno. */
6993 spu_ref_may_alias_errno (ao_ref
*ref
)
6995 tree base
= ao_ref_base (ref
);
6997 /* With SPU newlib, errno is defined as something like
6999 The default implementation of this target macro does not
7000 recognize such expressions, so special-code for it here. */
7002 if (TREE_CODE (base
) == VAR_DECL
7003 && !TREE_STATIC (base
)
7004 && DECL_EXTERNAL (base
)
7005 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7006 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7007 "_impure_data") == 0
7008 /* _errno is the first member of _impure_data. */
7009 && ref
->offset
== 0)
7012 return default_ref_may_alias_errno (ref
);
7015 /* Output thunk to FILE that implements a C++ virtual function call (with
7016 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7017 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7018 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7019 relative to the resulting this pointer. */
7022 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7023 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7028 /* Make sure unwind info is emitted for the thunk if needed. */
7029 final_start_function (emit_barrier (), file
, 1);
7031 /* Operand 0 is the target function. */
7032 op
[0] = XEXP (DECL_RTL (function
), 0);
7034 /* Operand 1 is the 'this' pointer. */
7035 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7036 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7038 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7040 /* Operands 2/3 are the low/high halfwords of delta. */
7041 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7042 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7044 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7045 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7046 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7048 /* Operands 6/7 are temporary registers. */
7049 op
[6] = gen_rtx_REG (Pmode
, 79);
7050 op
[7] = gen_rtx_REG (Pmode
, 78);
7052 /* Add DELTA to this pointer. */
7055 if (delta
>= -0x200 && delta
< 0x200)
7056 output_asm_insn ("ai\t%1,%1,%2", op
);
7057 else if (delta
>= -0x8000 && delta
< 0x8000)
7059 output_asm_insn ("il\t%6,%2", op
);
7060 output_asm_insn ("a\t%1,%1,%6", op
);
7064 output_asm_insn ("ilhu\t%6,%3", op
);
7065 output_asm_insn ("iohl\t%6,%2", op
);
7066 output_asm_insn ("a\t%1,%1,%6", op
);
7070 /* Perform vcall adjustment. */
7073 output_asm_insn ("lqd\t%7,0(%1)", op
);
7074 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7076 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7077 output_asm_insn ("ai\t%7,%7,%4", op
);
7078 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7080 output_asm_insn ("il\t%6,%4", op
);
7081 output_asm_insn ("a\t%7,%7,%6", op
);
7085 output_asm_insn ("ilhu\t%6,%5", op
);
7086 output_asm_insn ("iohl\t%6,%4", op
);
7087 output_asm_insn ("a\t%7,%7,%6", op
);
7090 output_asm_insn ("lqd\t%6,0(%7)", op
);
7091 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7092 output_asm_insn ("a\t%1,%1,%6", op
);
7095 /* Jump to target. */
7096 output_asm_insn ("br\t%0", op
);
7098 final_end_function ();
7101 /* Canonicalize a comparison from one we don't have to one we do have. */
7103 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7104 bool op0_preserve_value
)
7106 if (!op0_preserve_value
7107 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7112 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7116 /* Table of machine attributes. */
7117 static const struct attribute_spec spu_attribute_table
[] =
7119 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7120 affects_type_identity } */
7121 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7123 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7125 { NULL
, 0, 0, false, false, false, NULL
, false }
7128 /* TARGET overrides. */
7130 #undef TARGET_ADDR_SPACE_POINTER_MODE
7131 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7133 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7134 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7136 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7137 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7138 spu_addr_space_legitimate_address_p
7140 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7141 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7143 #undef TARGET_ADDR_SPACE_SUBSET_P
7144 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7146 #undef TARGET_ADDR_SPACE_CONVERT
7147 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7149 #undef TARGET_INIT_BUILTINS
7150 #define TARGET_INIT_BUILTINS spu_init_builtins
7151 #undef TARGET_BUILTIN_DECL
7152 #define TARGET_BUILTIN_DECL spu_builtin_decl
7154 #undef TARGET_EXPAND_BUILTIN
7155 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7157 #undef TARGET_UNWIND_WORD_MODE
7158 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7160 #undef TARGET_LEGITIMIZE_ADDRESS
7161 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7163 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7164 and .quad for the debugger. When it is known that the assembler is fixed,
7165 these can be removed. */
7166 #undef TARGET_ASM_UNALIGNED_SI_OP
7167 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7169 #undef TARGET_ASM_ALIGNED_DI_OP
7170 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7172 /* The .8byte directive doesn't seem to work well for a 32 bit
7174 #undef TARGET_ASM_UNALIGNED_DI_OP
7175 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7177 #undef TARGET_RTX_COSTS
7178 #define TARGET_RTX_COSTS spu_rtx_costs
7180 #undef TARGET_ADDRESS_COST
7181 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7183 #undef TARGET_SCHED_ISSUE_RATE
7184 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7186 #undef TARGET_SCHED_INIT_GLOBAL
7187 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7189 #undef TARGET_SCHED_INIT
7190 #define TARGET_SCHED_INIT spu_sched_init
7192 #undef TARGET_SCHED_VARIABLE_ISSUE
7193 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7195 #undef TARGET_SCHED_REORDER
7196 #define TARGET_SCHED_REORDER spu_sched_reorder
7198 #undef TARGET_SCHED_REORDER2
7199 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7201 #undef TARGET_SCHED_ADJUST_COST
7202 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7204 #undef TARGET_ATTRIBUTE_TABLE
7205 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7207 #undef TARGET_ASM_INTEGER
7208 #define TARGET_ASM_INTEGER spu_assemble_integer
7210 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7211 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7213 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7214 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7216 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7217 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7219 #undef TARGET_ASM_GLOBALIZE_LABEL
7220 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7222 #undef TARGET_PASS_BY_REFERENCE
7223 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7225 #undef TARGET_FUNCTION_ARG
7226 #define TARGET_FUNCTION_ARG spu_function_arg
7228 #undef TARGET_FUNCTION_ARG_ADVANCE
7229 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7231 #undef TARGET_MUST_PASS_IN_STACK
7232 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7234 #undef TARGET_BUILD_BUILTIN_VA_LIST
7235 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7237 #undef TARGET_EXPAND_BUILTIN_VA_START
7238 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7240 #undef TARGET_SETUP_INCOMING_VARARGS
7241 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7243 #undef TARGET_MACHINE_DEPENDENT_REORG
7244 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7246 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7247 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7249 #undef TARGET_INIT_LIBFUNCS
7250 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7252 #undef TARGET_RETURN_IN_MEMORY
7253 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7255 #undef TARGET_ENCODE_SECTION_INFO
7256 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7258 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7259 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7261 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7262 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7264 #undef TARGET_VECTORIZE_INIT_COST
7265 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7267 #undef TARGET_VECTORIZE_ADD_STMT_COST
7268 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7270 #undef TARGET_VECTORIZE_FINISH_COST
7271 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7273 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7274 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7276 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7277 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7279 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7280 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7282 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7283 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7285 #undef TARGET_SCHED_SMS_RES_MII
7286 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7288 #undef TARGET_SECTION_TYPE_FLAGS
7289 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7291 #undef TARGET_ASM_SELECT_SECTION
7292 #define TARGET_ASM_SELECT_SECTION spu_select_section
7294 #undef TARGET_ASM_UNIQUE_SECTION
7295 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7297 #undef TARGET_LEGITIMATE_ADDRESS_P
7298 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7300 #undef TARGET_LEGITIMATE_CONSTANT_P
7301 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7303 #undef TARGET_TRAMPOLINE_INIT
7304 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7306 #undef TARGET_WARN_FUNC_RETURN
7307 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7309 #undef TARGET_OPTION_OVERRIDE
7310 #define TARGET_OPTION_OVERRIDE spu_option_override
7312 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7313 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7315 #undef TARGET_REF_MAY_ALIAS_ERRNO
7316 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7318 #undef TARGET_ASM_OUTPUT_MI_THUNK
7319 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7320 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7321 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7323 /* Variable tracking should be run after all optimizations which
7324 change order of insns. It also needs a valid CFG. */
7325 #undef TARGET_DELAY_VARTRACK
7326 #define TARGET_DELAY_VARTRACK true
7328 #undef TARGET_CANONICALIZE_COMPARISON
7329 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7331 struct gcc_target targetm
= TARGET_INITIALIZER
;