1 /* Copyright (C) 2006-2014 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
31 #include "stringpool.h"
32 #include "stor-layout.h"
46 #include "dominance.h"
52 #include "cfgcleanup.h"
53 #include "basic-block.h"
54 #include "diagnostic-core.h"
58 #include "target-def.h"
59 #include "langhooks.h"
61 #include "sched-int.h"
63 #include "hash-table.h"
64 #include "tree-ssa-alias.h"
65 #include "internal-fn.h"
66 #include "gimple-fold.h"
68 #include "gimple-expr.h"
72 #include "tm-constrs.h"
82 /* Builtin types, data and prototypes. */
84 enum spu_builtin_type_index
86 SPU_BTI_END_OF_PARAMS
,
88 /* We create new type nodes for these. */
100 /* A 16-byte type. (Implemented with V16QI_type_node) */
103 /* These all correspond to intSI_type_node */
117 /* These correspond to the standard types */
137 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
138 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
139 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
140 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
141 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
142 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
143 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
144 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
145 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
146 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
148 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
150 struct spu_builtin_range
155 static struct spu_builtin_range spu_builtin_range
[] = {
156 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
157 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
158 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
159 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
160 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
161 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
162 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
163 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
164 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
165 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
166 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
167 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
171 /* Target specific attribute specifications. */
172 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
174 /* Prototypes and external defs. */
175 static int get_pipe (rtx_insn
*insn
);
176 static int spu_naked_function_p (tree func
);
177 static int mem_is_padded_component_ref (rtx x
);
178 static void fix_range (const char *);
179 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
181 /* Which instruction set architecture to use. */
183 /* Which cpu are we tuning for. */
186 /* The hardware requires 8 insns between a hint and the branch it
187 effects. This variable describes how many rtl instructions the
188 compiler needs to see before inserting a hint, and then the compiler
189 will insert enough nops to make it at least 8 insns. The default is
190 for the compiler to allow up to 2 nops be emitted. The nops are
191 inserted in pairs, so we round down. */
192 int spu_hint_dist
= (8*4) - (2*4);
207 IC_POOL
, /* constant pool */
208 IC_IL1
, /* one il* instruction */
209 IC_IL2
, /* both ilhu and iohl instructions */
210 IC_IL1s
, /* one il* instruction */
211 IC_IL2s
, /* both ilhu and iohl instructions */
212 IC_FSMBI
, /* the fsmbi instruction */
213 IC_CPAT
, /* one of the c*d instructions */
214 IC_FSMBI2
/* fsmbi plus 1 other instruction */
217 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
218 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
219 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
220 static enum immediate_class
classify_immediate (rtx op
,
223 /* Pointer mode for __ea references. */
224 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
227 /* Define the structure for the machine field in struct function. */
228 struct GTY(()) machine_function
230 /* Register to use for PIC accesses. */
234 /* How to allocate a 'struct machine_function'. */
235 static struct machine_function
*
236 spu_init_machine_status (void)
238 return ggc_cleared_alloc
<machine_function
> ();
241 /* Implement TARGET_OPTION_OVERRIDE. */
243 spu_option_override (void)
245 /* Set up function hooks. */
246 init_machine_status
= spu_init_machine_status
;
248 /* Small loops will be unpeeled at -O3. For SPU it is more important
249 to keep code small by default. */
250 if (!flag_unroll_loops
&& !flag_peel_loops
)
251 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
252 global_options
.x_param_values
,
253 global_options_set
.x_param_values
);
255 flag_omit_frame_pointer
= 1;
257 /* Functions must be 8 byte aligned so we correctly handle dual issue */
258 if (align_functions
< 8)
261 spu_hint_dist
= 8*4 - spu_max_nops
*4;
262 if (spu_hint_dist
< 0)
265 if (spu_fixed_range_string
)
266 fix_range (spu_fixed_range_string
);
268 /* Determine processor architectural level. */
271 if (strcmp (&spu_arch_string
[0], "cell") == 0)
272 spu_arch
= PROCESSOR_CELL
;
273 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
274 spu_arch
= PROCESSOR_CELLEDP
;
276 error ("bad value (%s) for -march= switch", spu_arch_string
);
279 /* Determine processor to tune for. */
282 if (strcmp (&spu_tune_string
[0], "cell") == 0)
283 spu_tune
= PROCESSOR_CELL
;
284 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
285 spu_tune
= PROCESSOR_CELLEDP
;
287 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
290 /* Change defaults according to the processor architecture. */
291 if (spu_arch
== PROCESSOR_CELLEDP
)
293 /* If no command line option has been otherwise specified, change
294 the default to -mno-safe-hints on celledp -- only the original
295 Cell/B.E. processors require this workaround. */
296 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
297 target_flags
&= ~MASK_SAFE_HINTS
;
300 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
303 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
304 struct attribute_spec.handler. */
306 /* True if MODE is valid for the target. By "valid", we mean able to
307 be manipulated in non-trivial ways. In particular, this means all
308 the arithmetic is supported. */
310 spu_scalar_mode_supported_p (machine_mode mode
)
328 /* Similarly for vector modes. "Supported" here is less strict. At
329 least some operations are supported; need to check optabs or builtins
330 for further details. */
332 spu_vector_mode_supported_p (machine_mode mode
)
349 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
350 least significant bytes of the outer mode. This function returns
351 TRUE for the SUBREG's where this is correct. */
353 valid_subreg (rtx op
)
355 machine_mode om
= GET_MODE (op
);
356 machine_mode im
= GET_MODE (SUBREG_REG (op
));
357 return om
!= VOIDmode
&& im
!= VOIDmode
358 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
359 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
360 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
363 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
364 and adjust the start offset. */
366 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
370 /* Strip any paradoxical SUBREG. */
371 if (GET_CODE (op
) == SUBREG
372 && (GET_MODE_BITSIZE (GET_MODE (op
))
373 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
377 GET_MODE_BITSIZE (GET_MODE (op
)) -
378 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
379 op
= SUBREG_REG (op
);
381 /* If it is smaller than SI, assure a SUBREG */
382 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
386 *start
+= 32 - op_size
;
389 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
390 mode
= mode_for_size (op_size
, MODE_INT
, 0);
391 if (mode
!= GET_MODE (op
))
392 op
= gen_rtx_SUBREG (mode
, op
, 0);
397 spu_expand_extv (rtx ops
[], int unsignedp
)
399 rtx dst
= ops
[0], src
= ops
[1];
400 HOST_WIDE_INT width
= INTVAL (ops
[2]);
401 HOST_WIDE_INT start
= INTVAL (ops
[3]);
402 HOST_WIDE_INT align_mask
;
403 rtx s0
, s1
, mask
, r0
;
405 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
409 /* First, determine if we need 1 TImode load or 2. We need only 1
410 if the bits being extracted do not cross the alignment boundary
411 as determined by the MEM and its address. */
413 align_mask
= -MEM_ALIGN (src
);
414 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
416 /* Alignment is sufficient for 1 load. */
417 s0
= gen_reg_rtx (TImode
);
418 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
421 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
426 s0
= gen_reg_rtx (TImode
);
427 s1
= gen_reg_rtx (TImode
);
428 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
431 gcc_assert (start
+ width
<= 128);
434 rtx r1
= gen_reg_rtx (SImode
);
435 mask
= gen_reg_rtx (TImode
);
436 emit_move_insn (mask
, GEN_INT (-1));
437 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
438 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
439 if (GET_CODE (r0
) == CONST_INT
)
440 r1
= GEN_INT (INTVAL (r0
) & 15);
442 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
443 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
444 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
449 else if (GET_CODE (src
) == SUBREG
)
451 rtx r
= SUBREG_REG (src
);
452 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
453 s0
= gen_reg_rtx (TImode
);
454 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
455 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
457 emit_move_insn (s0
, src
);
461 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
462 s0
= gen_reg_rtx (TImode
);
463 emit_move_insn (s0
, src
);
466 /* Now s0 is TImode and contains the bits to extract at start. */
469 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
472 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
474 emit_move_insn (dst
, s0
);
478 spu_expand_insv (rtx ops
[])
480 HOST_WIDE_INT width
= INTVAL (ops
[1]);
481 HOST_WIDE_INT start
= INTVAL (ops
[2]);
482 HOST_WIDE_INT maskbits
;
483 machine_mode dst_mode
;
484 rtx dst
= ops
[0], src
= ops
[3];
491 if (GET_CODE (ops
[0]) == MEM
)
492 dst
= gen_reg_rtx (TImode
);
494 dst
= adjust_operand (dst
, &start
);
495 dst_mode
= GET_MODE (dst
);
496 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
498 if (CONSTANT_P (src
))
501 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
502 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
504 src
= adjust_operand (src
, 0);
506 mask
= gen_reg_rtx (dst_mode
);
507 shift_reg
= gen_reg_rtx (dst_mode
);
508 shift
= dst_size
- start
- width
;
510 /* It's not safe to use subreg here because the compiler assumes
511 that the SUBREG_REG is right justified in the SUBREG. */
512 convert_move (shift_reg
, src
, 1);
519 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
522 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
525 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
537 maskbits
= (-1ll << (32 - width
- start
));
539 maskbits
+= (1ll << (32 - start
));
540 emit_move_insn (mask
, GEN_INT (maskbits
));
543 maskbits
= (-1ll << (64 - width
- start
));
545 maskbits
+= (1ll << (64 - start
));
546 emit_move_insn (mask
, GEN_INT (maskbits
));
550 unsigned char arr
[16];
552 memset (arr
, 0, sizeof (arr
));
553 arr
[i
] = 0xff >> (start
& 7);
554 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
556 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
557 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
563 if (GET_CODE (ops
[0]) == MEM
)
565 rtx low
= gen_reg_rtx (SImode
);
566 rtx rotl
= gen_reg_rtx (SImode
);
567 rtx mask0
= gen_reg_rtx (TImode
);
573 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
574 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
575 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
576 emit_insn (gen_negsi2 (rotl
, low
));
577 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
578 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
579 mem
= change_address (ops
[0], TImode
, addr0
);
580 set_mem_alias_set (mem
, 0);
581 emit_move_insn (dst
, mem
);
582 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
583 if (start
+ width
> MEM_ALIGN (ops
[0]))
585 rtx shl
= gen_reg_rtx (SImode
);
586 rtx mask1
= gen_reg_rtx (TImode
);
587 rtx dst1
= gen_reg_rtx (TImode
);
589 addr1
= plus_constant (Pmode
, addr
, 16);
590 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
591 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
592 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
593 mem1
= change_address (ops
[0], TImode
, addr1
);
594 set_mem_alias_set (mem1
, 0);
595 emit_move_insn (dst1
, mem1
);
596 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
597 emit_move_insn (mem1
, dst1
);
599 emit_move_insn (mem
, dst
);
602 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
607 spu_expand_block_move (rtx ops
[])
609 HOST_WIDE_INT bytes
, align
, offset
;
610 rtx src
, dst
, sreg
, dreg
, target
;
612 if (GET_CODE (ops
[2]) != CONST_INT
613 || GET_CODE (ops
[3]) != CONST_INT
614 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
617 bytes
= INTVAL (ops
[2]);
618 align
= INTVAL (ops
[3]);
628 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
630 dst
= adjust_address (ops
[0], V16QImode
, offset
);
631 src
= adjust_address (ops
[1], V16QImode
, offset
);
632 emit_move_insn (dst
, src
);
637 unsigned char arr
[16] = { 0 };
638 for (i
= 0; i
< bytes
- offset
; i
++)
640 dst
= adjust_address (ops
[0], V16QImode
, offset
);
641 src
= adjust_address (ops
[1], V16QImode
, offset
);
642 mask
= gen_reg_rtx (V16QImode
);
643 sreg
= gen_reg_rtx (V16QImode
);
644 dreg
= gen_reg_rtx (V16QImode
);
645 target
= gen_reg_rtx (V16QImode
);
646 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
647 emit_move_insn (dreg
, dst
);
648 emit_move_insn (sreg
, src
);
649 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
650 emit_move_insn (dst
, target
);
658 { SPU_EQ
, SPU_GT
, SPU_GTU
};
660 int spu_comp_icode
[12][3] = {
661 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
662 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
663 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
664 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
665 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
666 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
667 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
668 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
669 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
670 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
671 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
672 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
675 /* Generate a compare for CODE. Return a brand-new rtx that represents
676 the result of the compare. GCC can figure this out too if we don't
677 provide all variations of compares, but GCC always wants to use
678 WORD_MODE, we can generate better code in most cases if we do it
681 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
683 int reverse_compare
= 0;
684 int reverse_test
= 0;
685 rtx compare_result
, eq_result
;
686 rtx comp_rtx
, eq_rtx
;
687 machine_mode comp_mode
;
688 machine_mode op_mode
;
689 enum spu_comp_code scode
, eq_code
;
690 enum insn_code ior_code
;
691 enum rtx_code code
= GET_CODE (cmp
);
692 rtx op0
= XEXP (cmp
, 0);
693 rtx op1
= XEXP (cmp
, 1);
697 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
698 and so on, to keep the constant in operand 1. */
699 if (GET_CODE (op1
) == CONST_INT
)
701 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
702 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
726 /* However, if we generate an integer result, performing a reverse test
727 would require an extra negation, so avoid that where possible. */
728 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
730 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
731 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
748 op_mode
= GET_MODE (op0
);
754 if (HONOR_NANS (op_mode
))
769 if (HONOR_NANS (op_mode
))
861 comp_mode
= V4SImode
;
865 comp_mode
= V2DImode
;
872 if (GET_MODE (op1
) == DFmode
873 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
876 if (is_set
== 0 && op1
== const0_rtx
877 && (GET_MODE (op0
) == SImode
878 || GET_MODE (op0
) == HImode
879 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
881 /* Don't need to set a register with the result when we are
882 comparing against zero and branching. */
883 reverse_test
= !reverse_test
;
884 compare_result
= op0
;
888 compare_result
= gen_reg_rtx (comp_mode
);
897 if (spu_comp_icode
[index
][scode
] == 0)
900 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
902 op0
= force_reg (op_mode
, op0
);
903 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
905 op1
= force_reg (op_mode
, op1
);
906 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
910 emit_insn (comp_rtx
);
914 eq_result
= gen_reg_rtx (comp_mode
);
915 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
920 ior_code
= optab_handler (ior_optab
, comp_mode
);
921 gcc_assert (ior_code
!= CODE_FOR_nothing
);
922 emit_insn (GEN_FCN (ior_code
)
923 (compare_result
, compare_result
, eq_result
));
932 /* We don't have branch on QI compare insns, so we convert the
933 QI compare result to a HI result. */
934 if (comp_mode
== QImode
)
936 rtx old_res
= compare_result
;
937 compare_result
= gen_reg_rtx (HImode
);
939 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
943 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
945 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
947 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
948 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
949 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
952 else if (is_set
== 2)
954 rtx target
= operands
[0];
955 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
956 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
957 machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
959 rtx op_t
= operands
[2];
960 rtx op_f
= operands
[3];
962 /* The result of the comparison can be SI, HI or QI mode. Create a
963 mask based on that result. */
964 if (target_size
> compare_size
)
966 select_mask
= gen_reg_rtx (mode
);
967 emit_insn (gen_extend_compare (select_mask
, compare_result
));
969 else if (target_size
< compare_size
)
971 gen_rtx_SUBREG (mode
, compare_result
,
972 (compare_size
- target_size
) / BITS_PER_UNIT
);
973 else if (comp_mode
!= mode
)
974 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
976 select_mask
= compare_result
;
978 if (GET_MODE (target
) != GET_MODE (op_t
)
979 || GET_MODE (target
) != GET_MODE (op_f
))
983 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
985 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
989 rtx target
= operands
[0];
991 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
992 gen_rtx_NOT (comp_mode
, compare_result
)));
993 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
994 emit_insn (gen_extendhisi2 (target
, compare_result
));
995 else if (GET_MODE (target
) == SImode
996 && GET_MODE (compare_result
) == QImode
)
997 emit_insn (gen_extend_compare (target
, compare_result
));
999 emit_move_insn (target
, compare_result
);
1004 const_double_to_hwint (rtx x
)
1008 if (GET_MODE (x
) == SFmode
)
1010 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1011 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1013 else if (GET_MODE (x
) == DFmode
)
1016 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1017 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1019 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1027 hwint_to_const_double (machine_mode mode
, HOST_WIDE_INT v
)
1031 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1034 tv
[0] = (v
<< 32) >> 32;
1035 else if (mode
== DFmode
)
1037 tv
[1] = (v
<< 32) >> 32;
1040 real_from_target (&rv
, tv
, mode
);
1041 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1045 print_operand_address (FILE * file
, register rtx addr
)
1050 if (GET_CODE (addr
) == AND
1051 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1052 && INTVAL (XEXP (addr
, 1)) == -16)
1053 addr
= XEXP (addr
, 0);
1055 switch (GET_CODE (addr
))
1058 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1062 reg
= XEXP (addr
, 0);
1063 offset
= XEXP (addr
, 1);
1064 if (GET_CODE (offset
) == REG
)
1066 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1067 reg_names
[REGNO (offset
)]);
1069 else if (GET_CODE (offset
) == CONST_INT
)
1071 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1072 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1082 output_addr_const (file
, addr
);
1092 print_operand (FILE * file
, rtx x
, int code
)
1094 machine_mode mode
= GET_MODE (x
);
1096 unsigned char arr
[16];
1097 int xcode
= GET_CODE (x
);
1099 if (GET_MODE (x
) == VOIDmode
)
1102 case 'L': /* 128 bits, signed */
1103 case 'm': /* 128 bits, signed */
1104 case 'T': /* 128 bits, signed */
1105 case 't': /* 128 bits, signed */
1108 case 'K': /* 64 bits, signed */
1109 case 'k': /* 64 bits, signed */
1110 case 'D': /* 64 bits, signed */
1111 case 'd': /* 64 bits, signed */
1114 case 'J': /* 32 bits, signed */
1115 case 'j': /* 32 bits, signed */
1116 case 's': /* 32 bits, signed */
1117 case 'S': /* 32 bits, signed */
1124 case 'j': /* 32 bits, signed */
1125 case 'k': /* 64 bits, signed */
1126 case 'm': /* 128 bits, signed */
1127 if (xcode
== CONST_INT
1128 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1130 gcc_assert (logical_immediate_p (x
, mode
));
1131 constant_to_array (mode
, x
, arr
);
1132 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1133 val
= trunc_int_for_mode (val
, SImode
);
1134 switch (which_logical_immediate (val
))
1139 fprintf (file
, "h");
1142 fprintf (file
, "b");
1152 case 'J': /* 32 bits, signed */
1153 case 'K': /* 64 bits, signed */
1154 case 'L': /* 128 bits, signed */
1155 if (xcode
== CONST_INT
1156 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1158 gcc_assert (logical_immediate_p (x
, mode
)
1159 || iohl_immediate_p (x
, mode
));
1160 constant_to_array (mode
, x
, arr
);
1161 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1162 val
= trunc_int_for_mode (val
, SImode
);
1163 switch (which_logical_immediate (val
))
1169 val
= trunc_int_for_mode (val
, HImode
);
1172 val
= trunc_int_for_mode (val
, QImode
);
1177 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1183 case 't': /* 128 bits, signed */
1184 case 'd': /* 64 bits, signed */
1185 case 's': /* 32 bits, signed */
1188 enum immediate_class c
= classify_immediate (x
, mode
);
1192 constant_to_array (mode
, x
, arr
);
1193 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1194 val
= trunc_int_for_mode (val
, SImode
);
1195 switch (which_immediate_load (val
))
1200 fprintf (file
, "a");
1203 fprintf (file
, "h");
1206 fprintf (file
, "hu");
1213 constant_to_array (mode
, x
, arr
);
1214 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1216 fprintf (file
, "b");
1218 fprintf (file
, "h");
1220 fprintf (file
, "w");
1222 fprintf (file
, "d");
1225 if (xcode
== CONST_VECTOR
)
1227 x
= CONST_VECTOR_ELT (x
, 0);
1228 xcode
= GET_CODE (x
);
1230 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1231 fprintf (file
, "a");
1232 else if (xcode
== HIGH
)
1233 fprintf (file
, "hu");
1247 case 'T': /* 128 bits, signed */
1248 case 'D': /* 64 bits, signed */
1249 case 'S': /* 32 bits, signed */
1252 enum immediate_class c
= classify_immediate (x
, mode
);
1256 constant_to_array (mode
, x
, arr
);
1257 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1258 val
= trunc_int_for_mode (val
, SImode
);
1259 switch (which_immediate_load (val
))
1266 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1271 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1274 constant_to_array (mode
, x
, arr
);
1276 for (i
= 0; i
< 16; i
++)
1281 print_operand (file
, GEN_INT (val
), 0);
1284 constant_to_array (mode
, x
, arr
);
1285 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1286 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1291 if (GET_CODE (x
) == CONST_VECTOR
)
1292 x
= CONST_VECTOR_ELT (x
, 0);
1293 output_addr_const (file
, x
);
1295 fprintf (file
, "@h");
1309 if (xcode
== CONST_INT
)
1311 /* Only 4 least significant bits are relevant for generate
1312 control word instructions. */
1313 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1318 case 'M': /* print code for c*d */
1319 if (GET_CODE (x
) == CONST_INT
)
1323 fprintf (file
, "b");
1326 fprintf (file
, "h");
1329 fprintf (file
, "w");
1332 fprintf (file
, "d");
1341 case 'N': /* Negate the operand */
1342 if (xcode
== CONST_INT
)
1343 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1344 else if (xcode
== CONST_VECTOR
)
1345 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1346 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1349 case 'I': /* enable/disable interrupts */
1350 if (xcode
== CONST_INT
)
1351 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1354 case 'b': /* branch modifiers */
1356 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1357 else if (COMPARISON_P (x
))
1358 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1361 case 'i': /* indirect call */
1364 if (GET_CODE (XEXP (x
, 0)) == REG
)
1365 /* Used in indirect function calls. */
1366 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1368 output_address (XEXP (x
, 0));
1372 case 'p': /* load/store */
1376 xcode
= GET_CODE (x
);
1381 xcode
= GET_CODE (x
);
1384 fprintf (file
, "d");
1385 else if (xcode
== CONST_INT
)
1386 fprintf (file
, "a");
1387 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1388 fprintf (file
, "r");
1389 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1391 if (GET_CODE (XEXP (x
, 1)) == REG
)
1392 fprintf (file
, "x");
1394 fprintf (file
, "d");
1399 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1401 output_addr_const (file
, GEN_INT (val
));
1405 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1407 output_addr_const (file
, GEN_INT (val
));
1411 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1413 output_addr_const (file
, GEN_INT (val
));
1417 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1418 val
= (val
>> 3) & 0x1f;
1419 output_addr_const (file
, GEN_INT (val
));
1423 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1426 output_addr_const (file
, GEN_INT (val
));
1430 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1433 output_addr_const (file
, GEN_INT (val
));
1437 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1440 output_addr_const (file
, GEN_INT (val
));
1444 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1445 val
= -(val
& -8ll);
1446 val
= (val
>> 3) & 0x1f;
1447 output_addr_const (file
, GEN_INT (val
));
1452 constant_to_array (mode
, x
, arr
);
1453 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1454 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1459 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1460 else if (xcode
== MEM
)
1461 output_address (XEXP (x
, 0));
1462 else if (xcode
== CONST_VECTOR
)
1463 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1465 output_addr_const (file
, x
);
1472 output_operand_lossage ("invalid %%xn code");
1477 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1478 caller saved register. For leaf functions it is more efficient to
1479 use a volatile register because we won't need to save and restore the
1480 pic register. This routine is only valid after register allocation
1481 is completed, so we can pick an unused register. */
1485 if (!reload_completed
&& !reload_in_progress
)
1488 /* If we've already made the decision, we need to keep with it. Once we've
1489 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1490 return true since the register is now live; this should not cause us to
1491 "switch back" to using pic_offset_table_rtx. */
1492 if (!cfun
->machine
->pic_reg
)
1494 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1495 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1497 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1500 return cfun
->machine
->pic_reg
;
1503 /* Split constant addresses to handle cases that are too large.
1504 Add in the pic register when in PIC mode.
1505 Split immediates that require more than 1 instruction. */
1507 spu_split_immediate (rtx
* ops
)
1509 machine_mode mode
= GET_MODE (ops
[0]);
1510 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1516 unsigned char arrhi
[16];
1517 unsigned char arrlo
[16];
1518 rtx to
, temp
, hi
, lo
;
1520 machine_mode imode
= mode
;
1521 /* We need to do reals as ints because the constant used in the
1522 IOR might not be a legitimate real constant. */
1523 imode
= int_mode_for_mode (mode
);
1524 constant_to_array (mode
, ops
[1], arrhi
);
1526 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1529 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1530 for (i
= 0; i
< 16; i
+= 4)
1532 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1533 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1534 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1535 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1537 hi
= array_to_constant (imode
, arrhi
);
1538 lo
= array_to_constant (imode
, arrlo
);
1539 emit_move_insn (temp
, hi
);
1540 emit_insn (gen_rtx_SET
1541 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1546 unsigned char arr_fsmbi
[16];
1547 unsigned char arr_andbi
[16];
1548 rtx to
, reg_fsmbi
, reg_and
;
1550 machine_mode imode
= mode
;
1551 /* We need to do reals as ints because the constant used in the
1552 * AND might not be a legitimate real constant. */
1553 imode
= int_mode_for_mode (mode
);
1554 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1556 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1559 for (i
= 0; i
< 16; i
++)
1560 if (arr_fsmbi
[i
] != 0)
1562 arr_andbi
[0] = arr_fsmbi
[i
];
1563 arr_fsmbi
[i
] = 0xff;
1565 for (i
= 1; i
< 16; i
++)
1566 arr_andbi
[i
] = arr_andbi
[0];
1567 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1568 reg_and
= array_to_constant (imode
, arr_andbi
);
1569 emit_move_insn (to
, reg_fsmbi
);
1570 emit_insn (gen_rtx_SET
1571 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1575 if (reload_in_progress
|| reload_completed
)
1577 rtx mem
= force_const_mem (mode
, ops
[1]);
1578 if (TARGET_LARGE_MEM
)
1580 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1581 emit_move_insn (addr
, XEXP (mem
, 0));
1582 mem
= replace_equiv_address (mem
, addr
);
1584 emit_move_insn (ops
[0], mem
);
1590 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1594 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1595 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1598 emit_insn (gen_pic (ops
[0], ops
[1]));
1601 rtx pic_reg
= get_pic_reg ();
1602 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1604 return flag_pic
|| c
== IC_IL2s
;
1615 /* SAVING is TRUE when we are generating the actual load and store
1616 instructions for REGNO. When determining the size of the stack
1617 needed for saving register we must allocate enough space for the
1618 worst case, because we don't always have the information early enough
1619 to not allocate it. But we can at least eliminate the actual loads
1620 and stores during the prologue/epilogue. */
1622 need_to_save_reg (int regno
, int saving
)
1624 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1627 && regno
== PIC_OFFSET_TABLE_REGNUM
1628 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1633 /* This function is only correct starting with local register
1636 spu_saved_regs_size (void)
1638 int reg_save_size
= 0;
1641 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1642 if (need_to_save_reg (regno
, 0))
1643 reg_save_size
+= 0x10;
1644 return reg_save_size
;
1648 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1650 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1652 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1653 return emit_insn (gen_movv4si (mem
, reg
));
1657 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1659 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1661 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1662 return emit_insn (gen_movv4si (reg
, mem
));
1665 /* This happens after reload, so we need to expand it. */
1667 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1670 if (satisfies_constraint_K (GEN_INT (imm
)))
1672 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1676 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1677 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1678 if (REGNO (src
) == REGNO (scratch
))
1684 /* Return nonzero if this function is known to have a null epilogue. */
1687 direct_return (void)
1689 if (reload_completed
)
1691 if (cfun
->static_chain_decl
== 0
1692 && (spu_saved_regs_size ()
1694 + crtl
->outgoing_args_size
1695 + crtl
->args
.pretend_args_size
== 0)
1703 The stack frame looks like this:
1707 AP -> +-------------+
1710 prev SP | back chain |
1713 | reg save | crtl->args.pretend_args_size bytes
1716 | saved regs | spu_saved_regs_size() bytes
1717 FP -> +-------------+
1719 | vars | get_frame_size() bytes
1720 HFP -> +-------------+
1723 | args | crtl->outgoing_args_size bytes
1729 SP -> +-------------+
1733 spu_expand_prologue (void)
1735 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1736 HOST_WIDE_INT total_size
;
1737 HOST_WIDE_INT saved_regs_size
;
1738 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1739 rtx scratch_reg_0
, scratch_reg_1
;
1743 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1744 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1746 if (spu_naked_function_p (current_function_decl
))
1749 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1750 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1752 saved_regs_size
= spu_saved_regs_size ();
1753 total_size
= size
+ saved_regs_size
1754 + crtl
->outgoing_args_size
1755 + crtl
->args
.pretend_args_size
;
1758 || cfun
->calls_alloca
|| total_size
> 0)
1759 total_size
+= STACK_POINTER_OFFSET
;
1761 /* Save this first because code after this might use the link
1762 register as a scratch register. */
1765 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1766 RTX_FRAME_RELATED_P (insn
) = 1;
1771 offset
= -crtl
->args
.pretend_args_size
;
1772 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1773 if (need_to_save_reg (regno
, 1))
1776 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1777 RTX_FRAME_RELATED_P (insn
) = 1;
1781 if (flag_pic
&& cfun
->machine
->pic_reg
)
1783 rtx pic_reg
= cfun
->machine
->pic_reg
;
1784 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1785 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1790 if (flag_stack_check
)
1792 /* We compare against total_size-1 because
1793 ($sp >= total_size) <=> ($sp > total_size-1) */
1794 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1795 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1796 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1797 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1799 emit_move_insn (scratch_v4si
, size_v4si
);
1800 size_v4si
= scratch_v4si
;
1802 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1803 emit_insn (gen_vec_extractv4si
1804 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1805 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1808 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1809 the value of the previous $sp because we save it as the back
1811 if (total_size
<= 2000)
1813 /* In this case we save the back chain first. */
1814 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1816 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1820 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1822 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1824 RTX_FRAME_RELATED_P (insn
) = 1;
1825 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1826 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1828 if (total_size
> 2000)
1830 /* Save the back chain ptr */
1831 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1834 if (frame_pointer_needed
)
1836 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1837 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1838 + crtl
->outgoing_args_size
;
1839 /* Set the new frame_pointer */
1840 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1841 RTX_FRAME_RELATED_P (insn
) = 1;
1842 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1843 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1844 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1848 if (flag_stack_usage_info
)
1849 current_function_static_stack_size
= total_size
;
1853 spu_expand_epilogue (bool sibcall_p
)
1855 int size
= get_frame_size (), offset
, regno
;
1856 HOST_WIDE_INT saved_regs_size
, total_size
;
1857 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1860 if (spu_naked_function_p (current_function_decl
))
1863 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1865 saved_regs_size
= spu_saved_regs_size ();
1866 total_size
= size
+ saved_regs_size
1867 + crtl
->outgoing_args_size
1868 + crtl
->args
.pretend_args_size
;
1871 || cfun
->calls_alloca
|| total_size
> 0)
1872 total_size
+= STACK_POINTER_OFFSET
;
1876 if (cfun
->calls_alloca
)
1877 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1879 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1882 if (saved_regs_size
> 0)
1884 offset
= -crtl
->args
.pretend_args_size
;
1885 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1886 if (need_to_save_reg (regno
, 1))
1889 frame_emit_load (regno
, sp_reg
, offset
);
1895 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1899 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1900 emit_jump_insn (gen__return ());
1905 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1909 /* This is inefficient because it ends up copying to a save-register
1910 which then gets saved even though $lr has already been saved. But
1911 it does generate better code for leaf functions and we don't need
1912 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1913 used for __builtin_return_address anyway, so maybe we don't care if
1914 it's inefficient. */
1915 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1919 /* Given VAL, generate a constant appropriate for MODE.
1920 If MODE is a vector mode, every element will be VAL.
1921 For TImode, VAL will be zero extended to 128 bits. */
1923 spu_const (machine_mode mode
, HOST_WIDE_INT val
)
1929 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1930 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1931 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1932 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1934 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1935 return immed_double_const (val
, 0, mode
);
1937 /* val is the bit representation of the float */
1938 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1939 return hwint_to_const_double (mode
, val
);
1941 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1942 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1944 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1946 units
= GET_MODE_NUNITS (mode
);
1948 v
= rtvec_alloc (units
);
1950 for (i
= 0; i
< units
; ++i
)
1951 RTVEC_ELT (v
, i
) = inner
;
1953 return gen_rtx_CONST_VECTOR (mode
, v
);
1956 /* Create a MODE vector constant from 4 ints. */
1958 spu_const_from_ints(machine_mode mode
, int a
, int b
, int c
, int d
)
1960 unsigned char arr
[16];
1961 arr
[0] = (a
>> 24) & 0xff;
1962 arr
[1] = (a
>> 16) & 0xff;
1963 arr
[2] = (a
>> 8) & 0xff;
1964 arr
[3] = (a
>> 0) & 0xff;
1965 arr
[4] = (b
>> 24) & 0xff;
1966 arr
[5] = (b
>> 16) & 0xff;
1967 arr
[6] = (b
>> 8) & 0xff;
1968 arr
[7] = (b
>> 0) & 0xff;
1969 arr
[8] = (c
>> 24) & 0xff;
1970 arr
[9] = (c
>> 16) & 0xff;
1971 arr
[10] = (c
>> 8) & 0xff;
1972 arr
[11] = (c
>> 0) & 0xff;
1973 arr
[12] = (d
>> 24) & 0xff;
1974 arr
[13] = (d
>> 16) & 0xff;
1975 arr
[14] = (d
>> 8) & 0xff;
1976 arr
[15] = (d
>> 0) & 0xff;
1977 return array_to_constant(mode
, arr
);
1980 /* branch hint stuff */
1982 /* An array of these is used to propagate hints to predecessor blocks. */
1985 rtx_insn
*prop_jump
; /* propagated from another block */
1986 int bb_index
; /* the original block. */
1988 static struct spu_bb_info
*spu_bb_info
;
1990 #define STOP_HINT_P(INSN) \
1992 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1993 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1995 /* 1 when RTX is a hinted branch or its target. We keep track of
1996 what has been hinted so the safe-hint code can test it easily. */
1997 #define HINTED_P(RTX) \
1998 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2000 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2001 #define SCHED_ON_EVEN_P(RTX) \
2002 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2004 /* Emit a nop for INSN such that the two will dual issue. This assumes
2005 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2006 We check for TImode to handle a MULTI1 insn which has dual issued its
2007 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
2009 emit_nop_for_insn (rtx_insn
*insn
)
2014 /* We need to handle JUMP_TABLE_DATA separately. */
2015 if (JUMP_TABLE_DATA_P (insn
))
2017 new_insn
= emit_insn_after (gen_lnop(), insn
);
2018 recog_memoized (new_insn
);
2019 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
2023 p
= get_pipe (insn
);
2024 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2025 new_insn
= emit_insn_after (gen_lnop (), insn
);
2026 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2028 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2029 PUT_MODE (new_insn
, TImode
);
2030 PUT_MODE (insn
, VOIDmode
);
2033 new_insn
= emit_insn_after (gen_lnop (), insn
);
2034 recog_memoized (new_insn
);
2035 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2038 /* Insert nops in basic blocks to meet dual issue alignment
2039 requirements. Also make sure hbrp and hint instructions are at least
2040 one cycle apart, possibly inserting a nop. */
2044 rtx_insn
*insn
, *next_insn
, *prev_insn
, *hbr_insn
= 0;
2048 /* This sets up INSN_ADDRESSES. */
2049 shorten_branches (get_insns ());
2051 /* Keep track of length added by nops. */
2055 insn
= get_insns ();
2056 if (!active_insn_p (insn
))
2057 insn
= next_active_insn (insn
);
2058 for (; insn
; insn
= next_insn
)
2060 next_insn
= next_active_insn (insn
);
2061 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2062 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2066 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2067 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2068 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2071 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2072 PUT_MODE (prev_insn
, GET_MODE (insn
));
2073 PUT_MODE (insn
, TImode
);
2074 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2080 if (INSN_CODE (insn
) == CODE_FOR_blockage
&& next_insn
)
2082 if (GET_MODE (insn
) == TImode
)
2083 PUT_MODE (next_insn
, TImode
);
2085 next_insn
= next_active_insn (insn
);
2087 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2088 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2090 if (((addr
+ length
) & 7) != 0)
2092 emit_nop_for_insn (prev_insn
);
2096 else if (GET_MODE (insn
) == TImode
2097 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2098 || get_attr_type (insn
) == TYPE_MULTI0
)
2099 && ((addr
+ length
) & 7) != 0)
2101 /* prev_insn will always be set because the first insn is
2102 always 8-byte aligned. */
2103 emit_nop_for_insn (prev_insn
);
2111 /* Routines for branch hints. */
2114 spu_emit_branch_hint (rtx_insn
*before
, rtx_insn
*branch
, rtx target
,
2115 int distance
, sbitmap blocks
)
2117 rtx branch_label
= 0;
2120 rtx_jump_table_data
*table
;
2122 if (before
== 0 || branch
== 0 || target
== 0)
2125 /* While scheduling we require hints to be no further than 600, so
2126 we need to enforce that here too */
2130 /* If we have a Basic block note, emit it after the basic block note. */
2131 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2132 before
= NEXT_INSN (before
);
2134 branch_label
= gen_label_rtx ();
2135 LABEL_NUSES (branch_label
)++;
2136 LABEL_PRESERVE_P (branch_label
) = 1;
2137 insn
= emit_label_before (branch_label
, branch
);
2138 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2139 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2141 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2142 recog_memoized (hint
);
2143 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2144 HINTED_P (branch
) = 1;
2146 if (GET_CODE (target
) == LABEL_REF
)
2147 HINTED_P (XEXP (target
, 0)) = 1;
2148 else if (tablejump_p (branch
, 0, &table
))
2152 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2153 vec
= XVEC (PATTERN (table
), 0);
2155 vec
= XVEC (PATTERN (table
), 1);
2156 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2157 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2160 if (distance
>= 588)
2162 /* Make sure the hint isn't scheduled any earlier than this point,
2163 which could make it too far for the branch offest to fit */
2164 insn
= emit_insn_before (gen_blockage (), hint
);
2165 recog_memoized (insn
);
2166 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2168 else if (distance
<= 8 * 4)
2170 /* To guarantee at least 8 insns between the hint and branch we
2173 for (d
= distance
; d
< 8 * 4; d
+= 4)
2176 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2177 recog_memoized (insn
);
2178 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2181 /* Make sure any nops inserted aren't scheduled before the hint. */
2182 insn
= emit_insn_after (gen_blockage (), hint
);
2183 recog_memoized (insn
);
2184 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2186 /* Make sure any nops inserted aren't scheduled after the call. */
2187 if (CALL_P (branch
) && distance
< 8 * 4)
2189 insn
= emit_insn_before (gen_blockage (), branch
);
2190 recog_memoized (insn
);
2191 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2196 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2197 the rtx for the branch target. */
2199 get_branch_target (rtx_insn
*branch
)
2201 if (JUMP_P (branch
))
2205 /* Return statements */
2206 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2207 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2210 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2213 set
= single_set (branch
);
2214 src
= SET_SRC (set
);
2215 if (GET_CODE (SET_DEST (set
)) != PC
)
2218 if (GET_CODE (src
) == IF_THEN_ELSE
)
2221 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2224 /* If the more probable case is not a fall through, then
2225 try a branch hint. */
2226 int prob
= XINT (note
, 0);
2227 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2228 && GET_CODE (XEXP (src
, 1)) != PC
)
2229 lab
= XEXP (src
, 1);
2230 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2231 && GET_CODE (XEXP (src
, 2)) != PC
)
2232 lab
= XEXP (src
, 2);
2236 if (GET_CODE (lab
) == RETURN
)
2237 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2245 else if (CALL_P (branch
))
2248 /* All of our call patterns are in a PARALLEL and the CALL is
2249 the first pattern in the PARALLEL. */
2250 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2252 call
= XVECEXP (PATTERN (branch
), 0, 0);
2253 if (GET_CODE (call
) == SET
)
2254 call
= SET_SRC (call
);
2255 if (GET_CODE (call
) != CALL
)
2257 return XEXP (XEXP (call
, 0), 0);
2262 /* The special $hbr register is used to prevent the insn scheduler from
2263 moving hbr insns across instructions which invalidate them. It
2264 should only be used in a clobber, and this function searches for
2265 insns which clobber it. */
2267 insn_clobbers_hbr (rtx_insn
*insn
)
2270 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2272 rtx parallel
= PATTERN (insn
);
2275 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2277 clobber
= XVECEXP (parallel
, 0, j
);
2278 if (GET_CODE (clobber
) == CLOBBER
2279 && GET_CODE (XEXP (clobber
, 0)) == REG
2280 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2287 /* Search up to 32 insns starting at FIRST:
2288 - at any kind of hinted branch, just return
2289 - at any unconditional branch in the first 15 insns, just return
2290 - at a call or indirect branch, after the first 15 insns, force it to
2291 an even address and return
2292 - at any unconditional branch, after the first 15 insns, force it to
2294 At then end of the search, insert an hbrp within 4 insns of FIRST,
2295 and an hbrp within 16 instructions of FIRST.
2298 insert_hbrp_for_ilb_runout (rtx_insn
*first
)
2300 rtx_insn
*insn
, *before_4
= 0, *before_16
= 0;
2301 int addr
= 0, length
, first_addr
= -1;
2302 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2303 int insert_lnop_after
= 0;
2304 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2307 if (first_addr
== -1)
2308 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2309 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2310 length
= get_attr_length (insn
);
2312 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2314 /* We test for 14 instructions because the first hbrp will add
2315 up to 2 instructions. */
2316 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2319 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2321 /* Make sure an hbrp is at least 2 cycles away from a hint.
2322 Insert an lnop after the hbrp when necessary. */
2323 if (before_4
== 0 && addr
> 0)
2326 insert_lnop_after
|= 1;
2328 else if (before_4
&& addr
<= 4 * 4)
2329 insert_lnop_after
|= 1;
2330 if (before_16
== 0 && addr
> 10 * 4)
2333 insert_lnop_after
|= 2;
2335 else if (before_16
&& addr
<= 14 * 4)
2336 insert_lnop_after
|= 2;
2339 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2341 if (addr
< hbrp_addr0
)
2343 else if (addr
< hbrp_addr1
)
2347 if (CALL_P (insn
) || JUMP_P (insn
))
2349 if (HINTED_P (insn
))
2352 /* Any branch after the first 15 insns should be on an even
2353 address to avoid a special case branch. There might be
2354 some nops and/or hbrps inserted, so we test after 10
2357 SCHED_ON_EVEN_P (insn
) = 1;
2360 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2364 if (addr
+ length
>= 32 * 4)
2366 gcc_assert (before_4
&& before_16
);
2367 if (hbrp_addr0
> 4 * 4)
2370 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2371 recog_memoized (insn
);
2372 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2373 INSN_ADDRESSES_NEW (insn
,
2374 INSN_ADDRESSES (INSN_UID (before_4
)));
2375 PUT_MODE (insn
, GET_MODE (before_4
));
2376 PUT_MODE (before_4
, TImode
);
2377 if (insert_lnop_after
& 1)
2379 insn
= emit_insn_before (gen_lnop (), before_4
);
2380 recog_memoized (insn
);
2381 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2382 INSN_ADDRESSES_NEW (insn
,
2383 INSN_ADDRESSES (INSN_UID (before_4
)));
2384 PUT_MODE (insn
, TImode
);
2387 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2388 && hbrp_addr1
> 16 * 4)
2391 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2392 recog_memoized (insn
);
2393 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2394 INSN_ADDRESSES_NEW (insn
,
2395 INSN_ADDRESSES (INSN_UID (before_16
)));
2396 PUT_MODE (insn
, GET_MODE (before_16
));
2397 PUT_MODE (before_16
, TImode
);
2398 if (insert_lnop_after
& 2)
2400 insn
= emit_insn_before (gen_lnop (), before_16
);
2401 recog_memoized (insn
);
2402 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2403 INSN_ADDRESSES_NEW (insn
,
2404 INSN_ADDRESSES (INSN_UID
2406 PUT_MODE (insn
, TImode
);
2412 else if (BARRIER_P (insn
))
2417 /* The SPU might hang when it executes 48 inline instructions after a
2418 hinted branch jumps to its hinted target. The beginning of a
2419 function and the return from a call might have been hinted, and
2420 must be handled as well. To prevent a hang we insert 2 hbrps. The
2421 first should be within 6 insns of the branch target. The second
2422 should be within 22 insns of the branch target. When determining
2423 if hbrps are necessary, we look for only 32 inline instructions,
2424 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2425 when inserting new hbrps, we insert them within 4 and 16 insns of
2431 if (TARGET_SAFE_HINTS
)
2433 shorten_branches (get_insns ());
2434 /* Insert hbrp at beginning of function */
2435 insn
= next_active_insn (get_insns ());
2437 insert_hbrp_for_ilb_runout (insn
);
2438 /* Insert hbrp after hinted targets. */
2439 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2440 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2441 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2445 static int in_spu_reorg
;
2448 spu_var_tracking (void)
2450 if (flag_var_tracking
)
2453 timevar_push (TV_VAR_TRACKING
);
2454 variable_tracking_main ();
2455 timevar_pop (TV_VAR_TRACKING
);
2456 df_finish_pass (false);
2460 /* Insert branch hints. There are no branch optimizations after this
2461 pass, so it's safe to set our branch hints now. */
2463 spu_machine_dependent_reorg (void)
2467 rtx_insn
*branch
, *insn
;
2468 rtx branch_target
= 0;
2469 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2473 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2475 /* We still do it for unoptimized code because an external
2476 function might have hinted a call or return. */
2477 compute_bb_for_insn ();
2480 spu_var_tracking ();
2481 free_bb_for_insn ();
2485 blocks
= sbitmap_alloc (last_basic_block_for_fn (cfun
));
2486 bitmap_clear (blocks
);
2489 compute_bb_for_insn ();
2491 /* (Re-)discover loops so that bb->loop_father can be used
2492 in the analysis below. */
2493 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2498 (struct spu_bb_info
*) xcalloc (n_basic_blocks_for_fn (cfun
),
2499 sizeof (struct spu_bb_info
));
2501 /* We need exact insn addresses and lengths. */
2502 shorten_branches (get_insns ());
2504 for (i
= n_basic_blocks_for_fn (cfun
) - 1; i
>= 0; i
--)
2506 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2508 if (spu_bb_info
[i
].prop_jump
)
2510 branch
= spu_bb_info
[i
].prop_jump
;
2511 branch_target
= get_branch_target (branch
);
2512 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2513 required_dist
= spu_hint_dist
;
2515 /* Search from end of a block to beginning. In this loop, find
2516 jumps which need a branch and emit them only when:
2517 - it's an indirect branch and we're at the insn which sets
2519 - we're at an insn that will invalidate the hint. e.g., a
2520 call, another hint insn, inline asm that clobbers $hbr, and
2521 some inlined operations (divmodsi4). Don't consider jumps
2522 because they are only at the end of a block and are
2523 considered when we are deciding whether to propagate
2524 - we're getting too far away from the branch. The hbr insns
2525 only have a signed 10 bit offset
2526 We go back as far as possible so the branch will be considered
2527 for propagation when we get to the beginning of the block. */
2528 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2532 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2534 && ((GET_CODE (branch_target
) == REG
2535 && set_of (branch_target
, insn
) != NULL_RTX
)
2536 || insn_clobbers_hbr (insn
)
2537 || branch_addr
- insn_addr
> 600))
2539 rtx_insn
*next
= NEXT_INSN (insn
);
2540 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2541 if (insn
!= BB_END (bb
)
2542 && branch_addr
- next_addr
>= required_dist
)
2546 "hint for %i in block %i before %i\n",
2547 INSN_UID (branch
), bb
->index
,
2549 spu_emit_branch_hint (next
, branch
, branch_target
,
2550 branch_addr
- next_addr
, blocks
);
2555 /* JUMP_P will only be true at the end of a block. When
2556 branch is already set it means we've previously decided
2557 to propagate a hint for that branch into this block. */
2558 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2561 if ((branch_target
= get_branch_target (insn
)))
2564 branch_addr
= insn_addr
;
2565 required_dist
= spu_hint_dist
;
2569 if (insn
== BB_HEAD (bb
))
2575 /* If we haven't emitted a hint for this branch yet, it might
2576 be profitable to emit it in one of the predecessor blocks,
2577 especially for loops. */
2579 basic_block prev
= 0, prop
= 0, prev2
= 0;
2580 int loop_exit
= 0, simple_loop
= 0;
2581 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2583 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2584 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2585 prev
= EDGE_PRED (bb
, j
)->src
;
2587 prev2
= EDGE_PRED (bb
, j
)->src
;
2589 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2590 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2592 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2595 /* If this branch is a loop exit then propagate to previous
2596 fallthru block. This catches the cases when it is a simple
2597 loop or when there is an initial branch into the loop. */
2598 if (prev
&& (loop_exit
|| simple_loop
)
2599 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2602 /* If there is only one adjacent predecessor. Don't propagate
2603 outside this loop. */
2604 else if (prev
&& single_pred_p (bb
)
2605 && prev
->loop_father
== bb
->loop_father
)
2608 /* If this is the JOIN block of a simple IF-THEN then
2609 propagate the hint to the HEADER block. */
2610 else if (prev
&& prev2
2611 && EDGE_COUNT (bb
->preds
) == 2
2612 && EDGE_COUNT (prev
->preds
) == 1
2613 && EDGE_PRED (prev
, 0)->src
== prev2
2614 && prev2
->loop_father
== bb
->loop_father
2615 && GET_CODE (branch_target
) != REG
)
2618 /* Don't propagate when:
2619 - this is a simple loop and the hint would be too far
2620 - this is not a simple loop and there are 16 insns in
2622 - the predecessor block ends in a branch that will be
2624 - the predecessor block ends in an insn that invalidates
2628 && (bbend
= BB_END (prop
))
2629 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2630 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2631 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2634 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2635 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2636 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2637 INSN_UID (branch
), loop_exit
, simple_loop
,
2638 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2640 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2641 spu_bb_info
[prop
->index
].bb_index
= i
;
2643 else if (branch_addr
- next_addr
>= required_dist
)
2646 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2647 INSN_UID (branch
), bb
->index
,
2648 INSN_UID (NEXT_INSN (insn
)));
2649 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2650 branch_addr
- next_addr
, blocks
);
2657 if (!bitmap_empty_p (blocks
))
2658 find_many_sub_basic_blocks (blocks
);
2660 /* We have to schedule to make sure alignment is ok. */
2661 FOR_EACH_BB_FN (bb
, cfun
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2663 /* The hints need to be scheduled, so call it again. */
2665 df_finish_pass (true);
2671 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2672 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2674 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2675 between its branch label and the branch . We don't move the
2676 label because GCC expects it at the beginning of the block. */
2677 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2678 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2679 rtx_insn
*label
= as_a
<rtx_insn
*> (XEXP (label_ref
, 0));
2682 for (branch
= NEXT_INSN (label
);
2683 !JUMP_P (branch
) && !CALL_P (branch
);
2684 branch
= NEXT_INSN (branch
))
2685 if (NONJUMP_INSN_P (branch
))
2686 offset
+= get_attr_length (branch
);
2688 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2691 spu_var_tracking ();
2693 loop_optimizer_finalize ();
2695 free_bb_for_insn ();
2701 /* Insn scheduling routines, primarily for dual issue. */
2703 spu_sched_issue_rate (void)
2709 uses_ls_unit(rtx_insn
*insn
)
2711 rtx set
= single_set (insn
);
2713 && (GET_CODE (SET_DEST (set
)) == MEM
2714 || GET_CODE (SET_SRC (set
)) == MEM
))
2720 get_pipe (rtx_insn
*insn
)
2723 /* Handle inline asm */
2724 if (INSN_CODE (insn
) == -1)
2726 t
= get_attr_type (insn
);
2751 case TYPE_IPREFETCH
:
2759 /* haifa-sched.c has a static variable that keeps track of the current
2760 cycle. It is passed to spu_sched_reorder, and we record it here for
2761 use by spu_sched_variable_issue. It won't be accurate if the
2762 scheduler updates it's clock_var between the two calls. */
2763 static int clock_var
;
2765 /* This is used to keep track of insn alignment. Set to 0 at the
2766 beginning of each block and increased by the "length" attr of each
2768 static int spu_sched_length
;
2770 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2771 ready list appropriately in spu_sched_reorder(). */
2772 static int pipe0_clock
;
2773 static int pipe1_clock
;
2775 static int prev_clock_var
;
2777 static int prev_priority
;
2779 /* The SPU needs to load the next ilb sometime during the execution of
2780 the previous ilb. There is a potential conflict if every cycle has a
2781 load or store. To avoid the conflict we make sure the load/store
2782 unit is free for at least one cycle during the execution of insns in
2783 the previous ilb. */
2784 static int spu_ls_first
;
2785 static int prev_ls_clock
;
2788 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2789 int max_ready ATTRIBUTE_UNUSED
)
2791 spu_sched_length
= 0;
2795 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2796 int max_ready ATTRIBUTE_UNUSED
)
2798 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2800 /* When any block might be at least 8-byte aligned, assume they
2801 will all be at least 8-byte aligned to make sure dual issue
2802 works out correctly. */
2803 spu_sched_length
= 0;
2805 spu_ls_first
= INT_MAX
;
2810 prev_clock_var
= -1;
2815 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2816 int verbose ATTRIBUTE_UNUSED
,
2817 rtx_insn
*insn
, int more
)
2821 if (GET_CODE (PATTERN (insn
)) == USE
2822 || GET_CODE (PATTERN (insn
)) == CLOBBER
2823 || (len
= get_attr_length (insn
)) == 0)
2826 spu_sched_length
+= len
;
2828 /* Reset on inline asm */
2829 if (INSN_CODE (insn
) == -1)
2831 spu_ls_first
= INT_MAX
;
2836 p
= get_pipe (insn
);
2838 pipe0_clock
= clock_var
;
2840 pipe1_clock
= clock_var
;
2844 if (clock_var
- prev_ls_clock
> 1
2845 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2846 spu_ls_first
= INT_MAX
;
2847 if (uses_ls_unit (insn
))
2849 if (spu_ls_first
== INT_MAX
)
2850 spu_ls_first
= spu_sched_length
;
2851 prev_ls_clock
= clock_var
;
2854 /* The scheduler hasn't inserted the nop, but we will later on.
2855 Include those nops in spu_sched_length. */
2856 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2857 spu_sched_length
+= 4;
2858 prev_clock_var
= clock_var
;
2860 /* more is -1 when called from spu_sched_reorder for new insns
2861 that don't have INSN_PRIORITY */
2863 prev_priority
= INSN_PRIORITY (insn
);
2866 /* Always try issuing more insns. spu_sched_reorder will decide
2867 when the cycle should be advanced. */
2871 /* This function is called for both TARGET_SCHED_REORDER and
2872 TARGET_SCHED_REORDER2. */
2874 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2875 rtx_insn
**ready
, int *nreadyp
, int clock
)
2877 int i
, nready
= *nreadyp
;
2878 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2883 if (nready
<= 0 || pipe1_clock
>= clock
)
2886 /* Find any rtl insns that don't generate assembly insns and schedule
2888 for (i
= nready
- 1; i
>= 0; i
--)
2891 if (INSN_CODE (insn
) == -1
2892 || INSN_CODE (insn
) == CODE_FOR_blockage
2893 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2895 ready
[i
] = ready
[nready
- 1];
2896 ready
[nready
- 1] = insn
;
2901 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2902 for (i
= 0; i
< nready
; i
++)
2903 if (INSN_CODE (ready
[i
]) != -1)
2906 switch (get_attr_type (insn
))
2931 case TYPE_IPREFETCH
:
2937 /* In the first scheduling phase, schedule loads and stores together
2938 to increase the chance they will get merged during postreload CSE. */
2939 if (!reload_completed
&& pipe_ls
>= 0)
2941 insn
= ready
[pipe_ls
];
2942 ready
[pipe_ls
] = ready
[nready
- 1];
2943 ready
[nready
- 1] = insn
;
2947 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2951 /* When we have loads/stores in every cycle of the last 15 insns and
2952 we are about to schedule another load/store, emit an hbrp insn
2955 && spu_sched_length
- spu_ls_first
>= 4 * 15
2956 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2958 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2959 recog_memoized (insn
);
2960 if (pipe0_clock
< clock
)
2961 PUT_MODE (insn
, TImode
);
2962 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2966 /* In general, we want to emit nops to increase dual issue, but dual
2967 issue isn't faster when one of the insns could be scheduled later
2968 without effecting the critical path. We look at INSN_PRIORITY to
2969 make a good guess, but it isn't perfect so -mdual-nops=n can be
2970 used to effect it. */
2971 if (in_spu_reorg
&& spu_dual_nops
< 10)
2973 /* When we are at an even address and we are not issuing nops to
2974 improve scheduling then we need to advance the cycle. */
2975 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2976 && (spu_dual_nops
== 0
2979 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2982 /* When at an odd address, schedule the highest priority insn
2983 without considering pipeline. */
2984 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2985 && (spu_dual_nops
== 0
2987 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2992 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2993 pipe0 insn in the ready list, schedule it. */
2994 if (pipe0_clock
< clock
&& pipe_0
>= 0)
2995 schedule_i
= pipe_0
;
2997 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2998 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3000 schedule_i
= pipe_1
;
3002 if (schedule_i
> -1)
3004 insn
= ready
[schedule_i
];
3005 ready
[schedule_i
] = ready
[nready
- 1];
3006 ready
[nready
- 1] = insn
;
3012 /* INSN is dependent on DEP_INSN. */
3014 spu_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
3018 /* The blockage pattern is used to prevent instructions from being
3019 moved across it and has no cost. */
3020 if (INSN_CODE (insn
) == CODE_FOR_blockage
3021 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3024 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3025 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3028 /* Make sure hbrps are spread out. */
3029 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3030 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3033 /* Make sure hints and hbrps are 2 cycles apart. */
3034 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3035 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3036 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3037 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3040 /* An hbrp has no real dependency on other insns. */
3041 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3042 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3045 /* Assuming that it is unlikely an argument register will be used in
3046 the first cycle of the called function, we reduce the cost for
3047 slightly better scheduling of dep_insn. When not hinted, the
3048 mispredicted branch would hide the cost as well. */
3051 rtx target
= get_branch_target (insn
);
3052 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3057 /* And when returning from a function, let's assume the return values
3058 are completed sooner too. */
3059 if (CALL_P (dep_insn
))
3062 /* Make sure an instruction that loads from the back chain is schedule
3063 away from the return instruction so a hint is more likely to get
3065 if (INSN_CODE (insn
) == CODE_FOR__return
3066 && (set
= single_set (dep_insn
))
3067 && GET_CODE (SET_DEST (set
)) == REG
3068 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3071 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3072 scheduler makes every insn in a block anti-dependent on the final
3073 jump_insn. We adjust here so higher cost insns will get scheduled
3075 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3076 return insn_cost (dep_insn
) - 3;
3081 /* Create a CONST_DOUBLE from a string. */
3083 spu_float_const (const char *string
, machine_mode mode
)
3085 REAL_VALUE_TYPE value
;
3086 value
= REAL_VALUE_ATOF (string
, mode
);
3087 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3091 spu_constant_address_p (rtx x
)
3093 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3094 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3095 || GET_CODE (x
) == HIGH
);
3098 static enum spu_immediate
3099 which_immediate_load (HOST_WIDE_INT val
)
3101 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3103 if (val
>= -0x8000 && val
<= 0x7fff)
3105 if (val
>= 0 && val
<= 0x3ffff)
3107 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3109 if ((val
& 0xffff) == 0)
3115 /* Return true when OP can be loaded by one of the il instructions, or
3116 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3118 immediate_load_p (rtx op
, machine_mode mode
)
3120 if (CONSTANT_P (op
))
3122 enum immediate_class c
= classify_immediate (op
, mode
);
3123 return c
== IC_IL1
|| c
== IC_IL1s
3124 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3129 /* Return true if the first SIZE bytes of arr is a constant that can be
3130 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3131 represent the size and offset of the instruction to use. */
3133 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3135 int cpat
, run
, i
, start
;
3139 for (i
= 0; i
< size
&& cpat
; i
++)
3147 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3149 else if (arr
[i
] == 0)
3151 while (arr
[i
+run
] == run
&& i
+run
< 16)
3153 if (run
!= 4 && run
!= 8)
3158 if ((i
& (run
-1)) != 0)
3165 if (cpat
&& (run
|| size
< 16))
3172 *pstart
= start
== -1 ? 16-run
: start
;
3178 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3179 it into a register. MODE is only valid when OP is a CONST_INT. */
3180 static enum immediate_class
3181 classify_immediate (rtx op
, machine_mode mode
)
3184 unsigned char arr
[16];
3185 int i
, j
, repeated
, fsmbi
, repeat
;
3187 gcc_assert (CONSTANT_P (op
));
3189 if (GET_MODE (op
) != VOIDmode
)
3190 mode
= GET_MODE (op
);
3192 /* A V4SI const_vector with all identical symbols is ok. */
3195 && GET_CODE (op
) == CONST_VECTOR
3196 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3197 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3198 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3199 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3200 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3201 op
= CONST_VECTOR_ELT (op
, 0);
3203 switch (GET_CODE (op
))
3207 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3210 /* We can never know if the resulting address fits in 18 bits and can be
3211 loaded with ila. For now, assume the address will not overflow if
3212 the displacement is "small" (fits 'K' constraint). */
3213 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3215 rtx sym
= XEXP (XEXP (op
, 0), 0);
3216 rtx cst
= XEXP (XEXP (op
, 0), 1);
3218 if (GET_CODE (sym
) == SYMBOL_REF
3219 && GET_CODE (cst
) == CONST_INT
3220 && satisfies_constraint_K (cst
))
3229 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3230 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3231 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3237 constant_to_array (mode
, op
, arr
);
3239 /* Check that each 4-byte slot is identical. */
3241 for (i
= 4; i
< 16; i
+= 4)
3242 for (j
= 0; j
< 4; j
++)
3243 if (arr
[j
] != arr
[i
+ j
])
3248 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3249 val
= trunc_int_for_mode (val
, SImode
);
3251 if (which_immediate_load (val
) != SPU_NONE
)
3255 /* Any mode of 2 bytes or smaller can be loaded with an il
3257 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3261 for (i
= 0; i
< 16 && fsmbi
; i
++)
3262 if (arr
[i
] != 0 && repeat
== 0)
3264 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3267 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3269 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3282 static enum spu_immediate
3283 which_logical_immediate (HOST_WIDE_INT val
)
3285 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3287 if (val
>= -0x200 && val
<= 0x1ff)
3289 if (val
>= 0 && val
<= 0xffff)
3291 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3293 val
= trunc_int_for_mode (val
, HImode
);
3294 if (val
>= -0x200 && val
<= 0x1ff)
3296 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3298 val
= trunc_int_for_mode (val
, QImode
);
3299 if (val
>= -0x200 && val
<= 0x1ff)
3306 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3309 const_vector_immediate_p (rtx x
)
3312 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3313 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3314 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3315 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3321 logical_immediate_p (rtx op
, machine_mode mode
)
3324 unsigned char arr
[16];
3327 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3328 || GET_CODE (op
) == CONST_VECTOR
);
3330 if (GET_CODE (op
) == CONST_VECTOR
3331 && !const_vector_immediate_p (op
))
3334 if (GET_MODE (op
) != VOIDmode
)
3335 mode
= GET_MODE (op
);
3337 constant_to_array (mode
, op
, arr
);
3339 /* Check that bytes are repeated. */
3340 for (i
= 4; i
< 16; i
+= 4)
3341 for (j
= 0; j
< 4; j
++)
3342 if (arr
[j
] != arr
[i
+ j
])
3345 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3346 val
= trunc_int_for_mode (val
, SImode
);
3348 i
= which_logical_immediate (val
);
3349 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3353 iohl_immediate_p (rtx op
, machine_mode mode
)
3356 unsigned char arr
[16];
3359 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3360 || GET_CODE (op
) == CONST_VECTOR
);
3362 if (GET_CODE (op
) == CONST_VECTOR
3363 && !const_vector_immediate_p (op
))
3366 if (GET_MODE (op
) != VOIDmode
)
3367 mode
= GET_MODE (op
);
3369 constant_to_array (mode
, op
, arr
);
3371 /* Check that bytes are repeated. */
3372 for (i
= 4; i
< 16; i
+= 4)
3373 for (j
= 0; j
< 4; j
++)
3374 if (arr
[j
] != arr
[i
+ j
])
3377 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3378 val
= trunc_int_for_mode (val
, SImode
);
3380 return val
>= 0 && val
<= 0xffff;
3384 arith_immediate_p (rtx op
, machine_mode mode
,
3385 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3388 unsigned char arr
[16];
3391 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3392 || GET_CODE (op
) == CONST_VECTOR
);
3394 if (GET_CODE (op
) == CONST_VECTOR
3395 && !const_vector_immediate_p (op
))
3398 if (GET_MODE (op
) != VOIDmode
)
3399 mode
= GET_MODE (op
);
3401 constant_to_array (mode
, op
, arr
);
3403 if (VECTOR_MODE_P (mode
))
3404 mode
= GET_MODE_INNER (mode
);
3406 bytes
= GET_MODE_SIZE (mode
);
3407 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3409 /* Check that bytes are repeated. */
3410 for (i
= bytes
; i
< 16; i
+= bytes
)
3411 for (j
= 0; j
< bytes
; j
++)
3412 if (arr
[j
] != arr
[i
+ j
])
3416 for (j
= 1; j
< bytes
; j
++)
3417 val
= (val
<< 8) | arr
[j
];
3419 val
= trunc_int_for_mode (val
, mode
);
3421 return val
>= low
&& val
<= high
;
3424 /* TRUE when op is an immediate and an exact power of 2, and given that
3425 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3426 all entries must be the same. */
3428 exp2_immediate_p (rtx op
, machine_mode mode
, int low
, int high
)
3430 machine_mode int_mode
;
3432 unsigned char arr
[16];
3435 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3436 || GET_CODE (op
) == CONST_VECTOR
);
3438 if (GET_CODE (op
) == CONST_VECTOR
3439 && !const_vector_immediate_p (op
))
3442 if (GET_MODE (op
) != VOIDmode
)
3443 mode
= GET_MODE (op
);
3445 constant_to_array (mode
, op
, arr
);
3447 if (VECTOR_MODE_P (mode
))
3448 mode
= GET_MODE_INNER (mode
);
3450 bytes
= GET_MODE_SIZE (mode
);
3451 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3453 /* Check that bytes are repeated. */
3454 for (i
= bytes
; i
< 16; i
+= bytes
)
3455 for (j
= 0; j
< bytes
; j
++)
3456 if (arr
[j
] != arr
[i
+ j
])
3460 for (j
= 1; j
< bytes
; j
++)
3461 val
= (val
<< 8) | arr
[j
];
3463 val
= trunc_int_for_mode (val
, int_mode
);
3465 /* Currently, we only handle SFmode */
3466 gcc_assert (mode
== SFmode
);
3469 int exp
= (val
>> 23) - 127;
3470 return val
> 0 && (val
& 0x007fffff) == 0
3471 && exp
>= low
&& exp
<= high
;
3476 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3479 ea_symbol_ref_p (const_rtx x
)
3483 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3485 rtx plus
= XEXP (x
, 0);
3486 rtx op0
= XEXP (plus
, 0);
3487 rtx op1
= XEXP (plus
, 1);
3488 if (GET_CODE (op1
) == CONST_INT
)
3492 return (GET_CODE (x
) == SYMBOL_REF
3493 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3494 && TREE_CODE (decl
) == VAR_DECL
3495 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3499 - any 32-bit constant (SImode, SFmode)
3500 - any constant that can be generated with fsmbi (any mode)
3501 - a 64-bit constant where the high and low bits are identical
3503 - a 128-bit constant where the four 32-bit words match. */
3505 spu_legitimate_constant_p (machine_mode mode
, rtx x
)
3507 subrtx_iterator::array_type array
;
3508 if (GET_CODE (x
) == HIGH
)
3511 /* Reject any __ea qualified reference. These can't appear in
3512 instructions but must be forced to the constant pool. */
3513 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3514 if (ea_symbol_ref_p (*iter
))
3517 /* V4SI with all identical symbols is valid. */
3520 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3521 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3522 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3523 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3524 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3525 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3527 if (GET_CODE (x
) == CONST_VECTOR
3528 && !const_vector_immediate_p (x
))
3533 /* Valid address are:
3534 - symbol_ref, label_ref, const
3536 - reg + const_int, where const_int is 16 byte aligned
3537 - reg + reg, alignment doesn't matter
3538 The alignment matters in the reg+const case because lqd and stqd
3539 ignore the 4 least significant bits of the const. We only care about
3540 16 byte modes because the expand phase will change all smaller MEM
3541 references to TImode. */
3543 spu_legitimate_address_p (machine_mode mode
,
3544 rtx x
, bool reg_ok_strict
)
3546 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3548 && GET_CODE (x
) == AND
3549 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3550 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3552 switch (GET_CODE (x
))
3555 return !TARGET_LARGE_MEM
;
3559 /* Keep __ea references until reload so that spu_expand_mov can see them
3561 if (ea_symbol_ref_p (x
))
3562 return !reload_in_progress
&& !reload_completed
;
3563 return !TARGET_LARGE_MEM
;
3566 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3574 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3579 rtx op0
= XEXP (x
, 0);
3580 rtx op1
= XEXP (x
, 1);
3581 if (GET_CODE (op0
) == SUBREG
)
3582 op0
= XEXP (op0
, 0);
3583 if (GET_CODE (op1
) == SUBREG
)
3584 op1
= XEXP (op1
, 0);
3585 if (GET_CODE (op0
) == REG
3586 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3587 && GET_CODE (op1
) == CONST_INT
3588 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3589 /* If virtual registers are involved, the displacement will
3590 change later on anyway, so checking would be premature.
3591 Reload will make sure the final displacement after
3592 register elimination is OK. */
3593 || op0
== arg_pointer_rtx
3594 || op0
== frame_pointer_rtx
3595 || op0
== virtual_stack_vars_rtx
)
3596 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3598 if (GET_CODE (op0
) == REG
3599 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3600 && GET_CODE (op1
) == REG
3601 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3612 /* Like spu_legitimate_address_p, except with named addresses. */
3614 spu_addr_space_legitimate_address_p (machine_mode mode
, rtx x
,
3615 bool reg_ok_strict
, addr_space_t as
)
3617 if (as
== ADDR_SPACE_EA
)
3618 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3620 else if (as
!= ADDR_SPACE_GENERIC
)
3623 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3626 /* When the address is reg + const_int, force the const_int into a
3629 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3630 machine_mode mode ATTRIBUTE_UNUSED
)
3633 /* Make sure both operands are registers. */
3634 if (GET_CODE (x
) == PLUS
)
3638 if (ALIGNED_SYMBOL_REF_P (op0
))
3640 op0
= force_reg (Pmode
, op0
);
3641 mark_reg_pointer (op0
, 128);
3643 else if (GET_CODE (op0
) != REG
)
3644 op0
= force_reg (Pmode
, op0
);
3645 if (ALIGNED_SYMBOL_REF_P (op1
))
3647 op1
= force_reg (Pmode
, op1
);
3648 mark_reg_pointer (op1
, 128);
3650 else if (GET_CODE (op1
) != REG
)
3651 op1
= force_reg (Pmode
, op1
);
3652 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3657 /* Like spu_legitimate_address, except with named address support. */
3659 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
,
3662 if (as
!= ADDR_SPACE_GENERIC
)
3665 return spu_legitimize_address (x
, oldx
, mode
);
3668 /* Reload reg + const_int for out-of-range displacements. */
3670 spu_legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
3671 int opnum
, int type
)
3673 bool removed_and
= false;
3675 if (GET_CODE (ad
) == AND
3676 && CONST_INT_P (XEXP (ad
, 1))
3677 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3683 if (GET_CODE (ad
) == PLUS
3684 && REG_P (XEXP (ad
, 0))
3685 && CONST_INT_P (XEXP (ad
, 1))
3686 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3687 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3689 /* Unshare the sum. */
3692 /* Reload the displacement. */
3693 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3694 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3695 opnum
, (enum reload_type
) type
);
3697 /* Add back AND for alignment if we stripped it. */
3699 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3707 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3708 struct attribute_spec.handler. */
3710 spu_handle_fndecl_attribute (tree
* node
,
3712 tree args ATTRIBUTE_UNUSED
,
3713 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3715 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3717 warning (0, "%qE attribute only applies to functions",
3719 *no_add_attrs
= true;
3725 /* Handle the "vector" attribute. */
3727 spu_handle_vector_attribute (tree
* node
, tree name
,
3728 tree args ATTRIBUTE_UNUSED
,
3729 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3731 tree type
= *node
, result
= NULL_TREE
;
3735 while (POINTER_TYPE_P (type
)
3736 || TREE_CODE (type
) == FUNCTION_TYPE
3737 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3738 type
= TREE_TYPE (type
);
3740 mode
= TYPE_MODE (type
);
3742 unsigned_p
= TYPE_UNSIGNED (type
);
3746 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3749 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3752 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3755 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3758 result
= V4SF_type_node
;
3761 result
= V2DF_type_node
;
3767 /* Propagate qualifiers attached to the element type
3768 onto the vector type. */
3769 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3770 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3772 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3775 warning (0, "%qE attribute ignored", name
);
3777 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3782 /* Return nonzero if FUNC is a naked function. */
3784 spu_naked_function_p (tree func
)
3788 if (TREE_CODE (func
) != FUNCTION_DECL
)
3791 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3792 return a
!= NULL_TREE
;
3796 spu_initial_elimination_offset (int from
, int to
)
3798 int saved_regs_size
= spu_saved_regs_size ();
3800 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3801 || get_frame_size () || saved_regs_size
)
3802 sp_offset
= STACK_POINTER_OFFSET
;
3803 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3804 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3805 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3806 return get_frame_size ();
3807 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3808 return sp_offset
+ crtl
->outgoing_args_size
3809 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3810 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3811 return get_frame_size () + saved_regs_size
+ sp_offset
;
3817 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3819 machine_mode mode
= TYPE_MODE (type
);
3820 int byte_size
= ((mode
== BLKmode
)
3821 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3823 /* Make sure small structs are left justified in a register. */
3824 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3825 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3830 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3831 int n
= byte_size
/ UNITS_PER_WORD
;
3832 v
= rtvec_alloc (nregs
);
3833 for (i
= 0; i
< n
; i
++)
3835 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3836 gen_rtx_REG (TImode
,
3839 GEN_INT (UNITS_PER_WORD
* i
));
3840 byte_size
-= UNITS_PER_WORD
;
3848 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3850 gen_rtx_EXPR_LIST (VOIDmode
,
3851 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3852 GEN_INT (UNITS_PER_WORD
* n
));
3854 return gen_rtx_PARALLEL (mode
, v
);
3856 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3860 spu_function_arg (cumulative_args_t cum_v
,
3862 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3864 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3867 if (*cum
>= MAX_REGISTER_ARGS
)
3870 byte_size
= ((mode
== BLKmode
)
3871 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3873 /* The ABI does not allow parameters to be passed partially in
3874 reg and partially in stack. */
3875 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3878 /* Make sure small structs are left justified in a register. */
3879 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3880 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3886 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3887 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3888 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3890 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3893 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3897 spu_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
3898 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3900 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3902 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3905 ? ((int_size_in_bytes (type
) + 15) / 16)
3908 : HARD_REGNO_NREGS (cum
, mode
));
3911 /* Variable sized types are passed by reference. */
3913 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3914 machine_mode mode ATTRIBUTE_UNUSED
,
3915 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3917 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3923 /* Create and return the va_list datatype.
3925 On SPU, va_list is an array type equivalent to
3927 typedef struct __va_list_tag
3929 void *__args __attribute__((__aligned(16)));
3930 void *__skip __attribute__((__aligned(16)));
3934 where __args points to the arg that will be returned by the next
3935 va_arg(), and __skip points to the previous stack frame such that
3936 when __args == __skip we should advance __args by 32 bytes. */
3938 spu_build_builtin_va_list (void)
3940 tree f_args
, f_skip
, record
, type_decl
;
3943 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3946 build_decl (BUILTINS_LOCATION
,
3947 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3949 f_args
= build_decl (BUILTINS_LOCATION
,
3950 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3951 f_skip
= build_decl (BUILTINS_LOCATION
,
3952 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3954 DECL_FIELD_CONTEXT (f_args
) = record
;
3955 DECL_ALIGN (f_args
) = 128;
3956 DECL_USER_ALIGN (f_args
) = 1;
3958 DECL_FIELD_CONTEXT (f_skip
) = record
;
3959 DECL_ALIGN (f_skip
) = 128;
3960 DECL_USER_ALIGN (f_skip
) = 1;
3962 TYPE_STUB_DECL (record
) = type_decl
;
3963 TYPE_NAME (record
) = type_decl
;
3964 TYPE_FIELDS (record
) = f_args
;
3965 DECL_CHAIN (f_args
) = f_skip
;
3967 /* We know this is being padded and we want it too. It is an internal
3968 type so hide the warnings from the user. */
3970 warn_padded
= false;
3972 layout_type (record
);
3976 /* The correct type is an array type of one element. */
3977 return build_array_type (record
, build_index_type (size_zero_node
));
3980 /* Implement va_start by filling the va_list structure VALIST.
3981 NEXTARG points to the first anonymous stack argument.
3983 The following global variables are used to initialize
3984 the va_list structure:
3987 the CUMULATIVE_ARGS for this function
3989 crtl->args.arg_offset_rtx:
3990 holds the offset of the first anonymous stack argument
3991 (relative to the virtual arg pointer). */
3994 spu_va_start (tree valist
, rtx nextarg
)
3996 tree f_args
, f_skip
;
3999 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4000 f_skip
= DECL_CHAIN (f_args
);
4002 valist
= build_simple_mem_ref (valist
);
4004 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4006 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4008 /* Find the __args area. */
4009 t
= make_tree (TREE_TYPE (args
), nextarg
);
4010 if (crtl
->args
.pretend_args_size
> 0)
4011 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
4012 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4013 TREE_SIDE_EFFECTS (t
) = 1;
4014 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4016 /* Find the __skip area. */
4017 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4018 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
4019 - STACK_POINTER_OFFSET
));
4020 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4021 TREE_SIDE_EFFECTS (t
) = 1;
4022 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4025 /* Gimplify va_arg by updating the va_list structure
4026 VALIST as required to retrieve an argument of type
4027 TYPE, and returning that argument.
4029 ret = va_arg(VALIST, TYPE);
4031 generates code equivalent to:
4033 paddedsize = (sizeof(TYPE) + 15) & -16;
4034 if (VALIST.__args + paddedsize > VALIST.__skip
4035 && VALIST.__args <= VALIST.__skip)
4036 addr = VALIST.__skip + 32;
4038 addr = VALIST.__args;
4039 VALIST.__args = addr + paddedsize;
4040 ret = *(TYPE *)addr;
4043 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4044 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4046 tree f_args
, f_skip
;
4048 HOST_WIDE_INT size
, rsize
;
4050 bool pass_by_reference_p
;
4052 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4053 f_skip
= DECL_CHAIN (f_args
);
4055 valist
= build_simple_mem_ref (valist
);
4057 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4059 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4061 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4063 /* if an object is dynamically sized, a pointer to it is passed
4064 instead of the object itself. */
4065 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4067 if (pass_by_reference_p
)
4068 type
= build_pointer_type (type
);
4069 size
= int_size_in_bytes (type
);
4070 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4072 /* build conditional expression to calculate addr. The expression
4073 will be gimplified later. */
4074 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4075 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4076 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4077 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4078 unshare_expr (skip
)));
4080 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4081 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4082 unshare_expr (args
));
4084 gimplify_assign (addr
, tmp
, pre_p
);
4086 /* update VALIST.__args */
4087 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4088 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4090 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4093 if (pass_by_reference_p
)
4094 addr
= build_va_arg_indirect_ref (addr
);
4096 return build_va_arg_indirect_ref (addr
);
4099 /* Save parameter registers starting with the register that corresponds
4100 to the first unnamed parameters. If the first unnamed parameter is
4101 in the stack then save no registers. Set pretend_args_size to the
4102 amount of space needed to save the registers. */
4104 spu_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4105 tree type
, int *pretend_size
, int no_rtl
)
4112 int ncum
= *get_cumulative_args (cum
);
4114 /* cum currently points to the last named argument, we want to
4115 start at the next argument. */
4116 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4118 offset
= -STACK_POINTER_OFFSET
;
4119 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4121 tmp
= gen_frame_mem (V4SImode
,
4122 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4124 emit_move_insn (tmp
,
4125 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4128 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4133 spu_conditional_register_usage (void)
4137 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4138 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4142 /* This is called any time we inspect the alignment of a register for
4145 reg_aligned_for_addr (rtx x
)
4148 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4149 return REGNO_POINTER_ALIGN (regno
) >= 128;
4152 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4153 into its SYMBOL_REF_FLAGS. */
4155 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4157 default_encode_section_info (decl
, rtl
, first
);
4159 /* If a variable has a forced alignment to < 16 bytes, mark it with
4160 SYMBOL_FLAG_ALIGN1. */
4161 if (TREE_CODE (decl
) == VAR_DECL
4162 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4163 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4166 /* Return TRUE if we are certain the mem refers to a complete object
4167 which is both 16-byte aligned and padded to a 16-byte boundary. This
4168 would make it safe to store with a single instruction.
4169 We guarantee the alignment and padding for static objects by aligning
4170 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4171 FIXME: We currently cannot guarantee this for objects on the stack
4172 because assign_parm_setup_stack calls assign_stack_local with the
4173 alignment of the parameter mode and in that case the alignment never
4174 gets adjusted by LOCAL_ALIGNMENT. */
4176 store_with_one_insn_p (rtx mem
)
4178 machine_mode mode
= GET_MODE (mem
);
4179 rtx addr
= XEXP (mem
, 0);
4180 if (mode
== BLKmode
)
4182 if (GET_MODE_SIZE (mode
) >= 16)
4184 /* Only static objects. */
4185 if (GET_CODE (addr
) == SYMBOL_REF
)
4187 /* We use the associated declaration to make sure the access is
4188 referring to the whole object.
4189 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4190 if it is necessary. Will there be cases where one exists, and
4191 the other does not? Will there be cases where both exist, but
4192 have different types? */
4193 tree decl
= MEM_EXPR (mem
);
4195 && TREE_CODE (decl
) == VAR_DECL
4196 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4198 decl
= SYMBOL_REF_DECL (addr
);
4200 && TREE_CODE (decl
) == VAR_DECL
4201 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4207 /* Return 1 when the address is not valid for a simple load and store as
4208 required by the '_mov*' patterns. We could make this less strict
4209 for loads, but we prefer mem's to look the same so they are more
4210 likely to be merged. */
4212 address_needs_split (rtx mem
)
4214 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4215 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4216 || !(store_with_one_insn_p (mem
)
4217 || mem_is_padded_component_ref (mem
))))
4223 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4224 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4225 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4227 /* MEM is known to be an __ea qualified memory access. Emit a call to
4228 fetch the ppu memory to local store, and return its address in local
4232 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4236 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4237 if (!cache_fetch_dirty
)
4238 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4239 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4240 2, ea_addr
, EAmode
, ndirty
, SImode
);
4245 cache_fetch
= init_one_libfunc ("__cache_fetch");
4246 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4247 1, ea_addr
, EAmode
);
4251 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4252 dirty bit marking, inline.
4254 The cache control data structure is an array of
4256 struct __cache_tag_array
4258 unsigned int tag_lo[4];
4259 unsigned int tag_hi[4];
4260 void *data_pointer[4];
4262 vector unsigned short dirty_bits[4];
4266 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4270 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4271 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4272 rtx index_mask
= gen_reg_rtx (SImode
);
4273 rtx tag_arr
= gen_reg_rtx (Pmode
);
4274 rtx splat_mask
= gen_reg_rtx (TImode
);
4275 rtx splat
= gen_reg_rtx (V4SImode
);
4276 rtx splat_hi
= NULL_RTX
;
4277 rtx tag_index
= gen_reg_rtx (Pmode
);
4278 rtx block_off
= gen_reg_rtx (SImode
);
4279 rtx tag_addr
= gen_reg_rtx (Pmode
);
4280 rtx tag
= gen_reg_rtx (V4SImode
);
4281 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4282 rtx cache_tag_hi
= NULL_RTX
;
4283 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4284 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4285 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4286 rtx tag_equal_hi
= NULL_RTX
;
4287 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4288 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4289 rtx eq_index
= gen_reg_rtx (SImode
);
4290 rtx bcomp
, hit_label
, hit_ref
, cont_label
;
4293 if (spu_ea_model
!= 32)
4295 splat_hi
= gen_reg_rtx (V4SImode
);
4296 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4297 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4300 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4301 emit_move_insn (tag_arr
, tag_arr_sym
);
4302 v
= 0x0001020300010203LL
;
4303 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4304 ea_addr_si
= ea_addr
;
4305 if (spu_ea_model
!= 32)
4306 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4308 /* tag_index = ea_addr & (tag_array_size - 128) */
4309 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4311 /* splat ea_addr to all 4 slots. */
4312 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4313 /* Similarly for high 32 bits of ea_addr. */
4314 if (spu_ea_model
!= 32)
4315 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4317 /* block_off = ea_addr & 127 */
4318 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4320 /* tag_addr = tag_arr + tag_index */
4321 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4323 /* Read cache tags. */
4324 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4325 if (spu_ea_model
!= 32)
4326 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4327 plus_constant (Pmode
,
4330 /* tag = ea_addr & -128 */
4331 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4333 /* Read all four cache data pointers. */
4334 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4335 plus_constant (Pmode
,
4339 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4340 if (spu_ea_model
!= 32)
4342 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4343 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4346 /* At most one of the tags compare equal, so tag_equal has one
4347 32-bit slot set to all 1's, with the other slots all zero.
4348 gbb picks off low bit from each byte in the 128-bit registers,
4349 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4351 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4352 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4354 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4355 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4357 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4358 (rotating eq_index mod 16 bytes). */
4359 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4360 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4362 /* Add block offset to form final data address. */
4363 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4365 /* Check that we did hit. */
4366 hit_label
= gen_label_rtx ();
4367 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4368 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4369 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4370 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4372 /* Say that this branch is very likely to happen. */
4373 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4374 add_int_reg_note (insn
, REG_BR_PROB
, v
);
4376 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4377 cont_label
= gen_label_rtx ();
4378 emit_jump_insn (gen_jump (cont_label
));
4381 emit_label (hit_label
);
4386 rtx dirty_bits
= gen_reg_rtx (TImode
);
4387 rtx dirty_off
= gen_reg_rtx (SImode
);
4388 rtx dirty_128
= gen_reg_rtx (TImode
);
4389 rtx neg_block_off
= gen_reg_rtx (SImode
);
4391 /* Set up mask with one dirty bit per byte of the mem we are
4392 writing, starting from top bit. */
4394 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4395 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4400 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4402 /* Form index into cache dirty_bits. eq_index is one of
4403 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4404 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4405 offset to each of the four dirty_bits elements. */
4406 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4408 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4410 /* Rotate bit mask to proper bit. */
4411 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4412 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4413 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4415 /* Or in the new dirty bits. */
4416 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4419 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4422 emit_label (cont_label
);
4426 expand_ea_mem (rtx mem
, bool is_store
)
4429 rtx data_addr
= gen_reg_rtx (Pmode
);
4432 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4433 if (optimize_size
|| optimize
== 0)
4434 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4436 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4438 if (ea_alias_set
== -1)
4439 ea_alias_set
= new_alias_set ();
4441 /* We generate a new MEM RTX to refer to the copy of the data
4442 in the cache. We do not copy memory attributes (except the
4443 alignment) from the original MEM, as they may no longer apply
4444 to the cache copy. */
4445 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4446 set_mem_alias_set (new_mem
, ea_alias_set
);
4447 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4453 spu_expand_mov (rtx
* ops
, machine_mode mode
)
4455 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4457 /* Perform the move in the destination SUBREG's inner mode. */
4458 ops
[0] = SUBREG_REG (ops
[0]);
4459 mode
= GET_MODE (ops
[0]);
4460 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4461 gcc_assert (ops
[1]);
4464 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4466 rtx from
= SUBREG_REG (ops
[1]);
4467 machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4469 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4470 && GET_MODE_CLASS (imode
) == MODE_INT
4471 && subreg_lowpart_p (ops
[1]));
4473 if (GET_MODE_SIZE (imode
) < 4)
4475 if (imode
!= GET_MODE (from
))
4476 from
= gen_rtx_SUBREG (imode
, from
, 0);
4478 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4480 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4482 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4485 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4489 /* At least one of the operands needs to be a register. */
4490 if ((reload_in_progress
| reload_completed
) == 0
4491 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4493 rtx temp
= force_reg (mode
, ops
[1]);
4494 emit_move_insn (ops
[0], temp
);
4497 if (reload_in_progress
|| reload_completed
)
4499 if (CONSTANT_P (ops
[1]))
4500 return spu_split_immediate (ops
);
4504 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4506 if (GET_CODE (ops
[1]) == CONST_INT
)
4508 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4509 if (val
!= INTVAL (ops
[1]))
4511 emit_move_insn (ops
[0], GEN_INT (val
));
4517 if (MEM_ADDR_SPACE (ops
[0]))
4518 ops
[0] = expand_ea_mem (ops
[0], true);
4519 return spu_split_store (ops
);
4523 if (MEM_ADDR_SPACE (ops
[1]))
4524 ops
[1] = expand_ea_mem (ops
[1], false);
4525 return spu_split_load (ops
);
4532 spu_convert_move (rtx dst
, rtx src
)
4534 machine_mode mode
= GET_MODE (dst
);
4535 machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4537 gcc_assert (GET_MODE (src
) == TImode
);
4538 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4539 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4540 gen_rtx_TRUNCATE (int_mode
,
4541 gen_rtx_LSHIFTRT (TImode
, src
,
4542 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4543 if (int_mode
!= mode
)
4545 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4546 emit_move_insn (dst
, reg
);
4550 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4551 the address from SRC and SRC+16. Return a REG or CONST_INT that
4552 specifies how many bytes to rotate the loaded registers, plus any
4553 extra from EXTRA_ROTQBY. The address and rotate amounts are
4554 normalized to improve merging of loads and rotate computations. */
4556 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4558 rtx addr
= XEXP (src
, 0);
4559 rtx p0
, p1
, rot
, addr0
, addr1
;
4565 if (MEM_ALIGN (src
) >= 128)
4566 /* Address is already aligned; simply perform a TImode load. */ ;
4567 else if (GET_CODE (addr
) == PLUS
)
4570 aligned reg + aligned reg => lqx
4571 aligned reg + unaligned reg => lqx, rotqby
4572 aligned reg + aligned const => lqd
4573 aligned reg + unaligned const => lqd, rotqbyi
4574 unaligned reg + aligned reg => lqx, rotqby
4575 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4576 unaligned reg + aligned const => lqd, rotqby
4577 unaligned reg + unaligned const -> not allowed by legitimate address
4579 p0
= XEXP (addr
, 0);
4580 p1
= XEXP (addr
, 1);
4581 if (!reg_aligned_for_addr (p0
))
4583 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4585 rot
= gen_reg_rtx (SImode
);
4586 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4588 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4592 && INTVAL (p1
) * BITS_PER_UNIT
4593 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4595 rot
= gen_reg_rtx (SImode
);
4596 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4601 rtx x
= gen_reg_rtx (SImode
);
4602 emit_move_insn (x
, p1
);
4603 if (!spu_arith_operand (p1
, SImode
))
4605 rot
= gen_reg_rtx (SImode
);
4606 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4607 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4615 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4617 rot_amt
= INTVAL (p1
) & 15;
4618 if (INTVAL (p1
) & -16)
4620 p1
= GEN_INT (INTVAL (p1
) & -16);
4621 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4626 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4630 else if (REG_P (addr
))
4632 if (!reg_aligned_for_addr (addr
))
4635 else if (GET_CODE (addr
) == CONST
)
4637 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4638 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4639 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4641 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4643 addr
= gen_rtx_CONST (Pmode
,
4644 gen_rtx_PLUS (Pmode
,
4645 XEXP (XEXP (addr
, 0), 0),
4646 GEN_INT (rot_amt
& -16)));
4648 addr
= XEXP (XEXP (addr
, 0), 0);
4652 rot
= gen_reg_rtx (Pmode
);
4653 emit_move_insn (rot
, addr
);
4656 else if (GET_CODE (addr
) == CONST_INT
)
4658 rot_amt
= INTVAL (addr
);
4659 addr
= GEN_INT (rot_amt
& -16);
4661 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4663 rot
= gen_reg_rtx (Pmode
);
4664 emit_move_insn (rot
, addr
);
4667 rot_amt
+= extra_rotby
;
4673 rtx x
= gen_reg_rtx (SImode
);
4674 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4678 if (!rot
&& rot_amt
)
4679 rot
= GEN_INT (rot_amt
);
4681 addr0
= copy_rtx (addr
);
4682 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4683 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4687 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4688 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4689 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4696 spu_split_load (rtx
* ops
)
4698 machine_mode mode
= GET_MODE (ops
[0]);
4699 rtx addr
, load
, rot
;
4702 if (GET_MODE_SIZE (mode
) >= 16)
4705 addr
= XEXP (ops
[1], 0);
4706 gcc_assert (GET_CODE (addr
) != AND
);
4708 if (!address_needs_split (ops
[1]))
4710 ops
[1] = change_address (ops
[1], TImode
, addr
);
4711 load
= gen_reg_rtx (TImode
);
4712 emit_insn (gen__movti (load
, ops
[1]));
4713 spu_convert_move (ops
[0], load
);
4717 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4719 load
= gen_reg_rtx (TImode
);
4720 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4723 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4725 spu_convert_move (ops
[0], load
);
4730 spu_split_store (rtx
* ops
)
4732 machine_mode mode
= GET_MODE (ops
[0]);
4734 rtx addr
, p0
, p1
, p1_lo
, smem
;
4738 if (GET_MODE_SIZE (mode
) >= 16)
4741 addr
= XEXP (ops
[0], 0);
4742 gcc_assert (GET_CODE (addr
) != AND
);
4744 if (!address_needs_split (ops
[0]))
4746 reg
= gen_reg_rtx (TImode
);
4747 emit_insn (gen_spu_convert (reg
, ops
[1]));
4748 ops
[0] = change_address (ops
[0], TImode
, addr
);
4749 emit_move_insn (ops
[0], reg
);
4753 if (GET_CODE (addr
) == PLUS
)
4756 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4757 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4758 aligned reg + aligned const => lqd, c?d, shuf, stqx
4759 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4760 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4761 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4762 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4763 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4766 p0
= XEXP (addr
, 0);
4767 p1
= p1_lo
= XEXP (addr
, 1);
4768 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4770 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4771 if (reg_aligned_for_addr (p0
))
4773 p1
= GEN_INT (INTVAL (p1
) & -16);
4774 if (p1
== const0_rtx
)
4777 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4781 rtx x
= gen_reg_rtx (SImode
);
4782 emit_move_insn (x
, p1
);
4783 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4787 else if (REG_P (addr
))
4791 p1
= p1_lo
= const0_rtx
;
4796 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4797 p1
= 0; /* aform doesn't use p1 */
4799 if (ALIGNED_SYMBOL_REF_P (addr
))
4801 else if (GET_CODE (addr
) == CONST
4802 && GET_CODE (XEXP (addr
, 0)) == PLUS
4803 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4804 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4806 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4808 addr
= gen_rtx_CONST (Pmode
,
4809 gen_rtx_PLUS (Pmode
,
4810 XEXP (XEXP (addr
, 0), 0),
4811 GEN_INT (v
& -16)));
4813 addr
= XEXP (XEXP (addr
, 0), 0);
4814 p1_lo
= GEN_INT (v
& 15);
4816 else if (GET_CODE (addr
) == CONST_INT
)
4818 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4819 addr
= GEN_INT (INTVAL (addr
) & -16);
4823 p1_lo
= gen_reg_rtx (SImode
);
4824 emit_move_insn (p1_lo
, addr
);
4828 gcc_assert (aform
== 0 || aform
== 1);
4829 reg
= gen_reg_rtx (TImode
);
4831 scalar
= store_with_one_insn_p (ops
[0]);
4834 /* We could copy the flags from the ops[0] MEM to mem here,
4835 We don't because we want this load to be optimized away if
4836 possible, and copying the flags will prevent that in certain
4837 cases, e.g. consider the volatile flag. */
4839 rtx pat
= gen_reg_rtx (TImode
);
4840 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4841 set_mem_alias_set (lmem
, 0);
4842 emit_insn (gen_movti (reg
, lmem
));
4844 if (!p0
|| reg_aligned_for_addr (p0
))
4845 p0
= stack_pointer_rtx
;
4849 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4850 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4854 if (GET_CODE (ops
[1]) == REG
)
4855 emit_insn (gen_spu_convert (reg
, ops
[1]));
4856 else if (GET_CODE (ops
[1]) == SUBREG
)
4857 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4862 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4863 emit_insn (gen_ashlti3
4864 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4866 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4867 /* We can't use the previous alias set because the memory has changed
4868 size and can potentially overlap objects of other types. */
4869 set_mem_alias_set (smem
, 0);
4871 emit_insn (gen_movti (smem
, reg
));
4875 /* Return TRUE if X is MEM which is a struct member reference
4876 and the member can safely be loaded and stored with a single
4877 instruction because it is padded. */
4879 mem_is_padded_component_ref (rtx x
)
4881 tree t
= MEM_EXPR (x
);
4883 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4885 t
= TREE_OPERAND (t
, 1);
4886 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4887 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4889 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4890 r
= DECL_FIELD_CONTEXT (t
);
4891 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4893 /* Make sure they are the same mode */
4894 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4896 /* If there are no following fields then the field alignment assures
4897 the structure is padded to the alignment which means this field is
4899 if (TREE_CHAIN (t
) == 0)
4901 /* If the following field is also aligned then this field will be
4904 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4909 /* Parse the -mfixed-range= option string. */
4911 fix_range (const char *const_str
)
4914 char *str
, *dash
, *comma
;
4916 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4917 REG2 are either register names or register numbers. The effect
4918 of this option is to mark the registers in the range from REG1 to
4919 REG2 as ``fixed'' so they won't be used by the compiler. */
4921 i
= strlen (const_str
);
4922 str
= (char *) alloca (i
+ 1);
4923 memcpy (str
, const_str
, i
+ 1);
4927 dash
= strchr (str
, '-');
4930 warning (0, "value of -mfixed-range must have form REG1-REG2");
4934 comma
= strchr (dash
+ 1, ',');
4938 first
= decode_reg_name (str
);
4941 warning (0, "unknown register name: %s", str
);
4945 last
= decode_reg_name (dash
+ 1);
4948 warning (0, "unknown register name: %s", dash
+ 1);
4956 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4960 for (i
= first
; i
<= last
; ++i
)
4961 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4971 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4972 can be generated using the fsmbi instruction. */
4974 fsmbi_const_p (rtx x
)
4978 /* We can always choose TImode for CONST_INT because the high bits
4979 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4980 enum immediate_class c
= classify_immediate (x
, TImode
);
4981 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4986 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4987 can be generated using the cbd, chd, cwd or cdd instruction. */
4989 cpat_const_p (rtx x
, machine_mode mode
)
4993 enum immediate_class c
= classify_immediate (x
, mode
);
4994 return c
== IC_CPAT
;
5000 gen_cpat_const (rtx
* ops
)
5002 unsigned char dst
[16];
5003 int i
, offset
, shift
, isize
;
5004 if (GET_CODE (ops
[3]) != CONST_INT
5005 || GET_CODE (ops
[2]) != CONST_INT
5006 || (GET_CODE (ops
[1]) != CONST_INT
5007 && GET_CODE (ops
[1]) != REG
))
5009 if (GET_CODE (ops
[1]) == REG
5010 && (!REG_POINTER (ops
[1])
5011 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
5014 for (i
= 0; i
< 16; i
++)
5016 isize
= INTVAL (ops
[3]);
5019 else if (isize
== 2)
5023 offset
= (INTVAL (ops
[2]) +
5024 (GET_CODE (ops
[1]) ==
5025 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5026 for (i
= 0; i
< isize
; i
++)
5027 dst
[offset
+ i
] = i
+ shift
;
5028 return array_to_constant (TImode
, dst
);
5031 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5032 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5033 than 16 bytes, the value is repeated across the rest of the array. */
5035 constant_to_array (machine_mode mode
, rtx x
, unsigned char arr
[16])
5040 memset (arr
, 0, 16);
5041 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5042 if (GET_CODE (x
) == CONST_INT
5043 || (GET_CODE (x
) == CONST_DOUBLE
5044 && (mode
== SFmode
|| mode
== DFmode
)))
5046 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5048 if (GET_CODE (x
) == CONST_DOUBLE
)
5049 val
= const_double_to_hwint (x
);
5052 first
= GET_MODE_SIZE (mode
) - 1;
5053 for (i
= first
; i
>= 0; i
--)
5055 arr
[i
] = val
& 0xff;
5058 /* Splat the constant across the whole array. */
5059 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5062 j
= (j
== first
) ? 0 : j
+ 1;
5065 else if (GET_CODE (x
) == CONST_DOUBLE
)
5067 val
= CONST_DOUBLE_LOW (x
);
5068 for (i
= 15; i
>= 8; i
--)
5070 arr
[i
] = val
& 0xff;
5073 val
= CONST_DOUBLE_HIGH (x
);
5074 for (i
= 7; i
>= 0; i
--)
5076 arr
[i
] = val
& 0xff;
5080 else if (GET_CODE (x
) == CONST_VECTOR
)
5084 mode
= GET_MODE_INNER (mode
);
5085 units
= CONST_VECTOR_NUNITS (x
);
5086 for (i
= 0; i
< units
; i
++)
5088 elt
= CONST_VECTOR_ELT (x
, i
);
5089 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5091 if (GET_CODE (elt
) == CONST_DOUBLE
)
5092 val
= const_double_to_hwint (elt
);
5095 first
= GET_MODE_SIZE (mode
) - 1;
5096 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5098 for (j
= first
; j
>= 0; j
--)
5100 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5110 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5111 smaller than 16 bytes, use the bytes that would represent that value
5112 in a register, e.g., for QImode return the value of arr[3]. */
5114 array_to_constant (machine_mode mode
, const unsigned char arr
[16])
5116 machine_mode inner_mode
;
5118 int units
, size
, i
, j
, k
;
5121 if (GET_MODE_CLASS (mode
) == MODE_INT
5122 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5124 j
= GET_MODE_SIZE (mode
);
5125 i
= j
< 4 ? 4 - j
: 0;
5126 for (val
= 0; i
< j
; i
++)
5127 val
= (val
<< 8) | arr
[i
];
5128 val
= trunc_int_for_mode (val
, mode
);
5129 return GEN_INT (val
);
5135 for (i
= high
= 0; i
< 8; i
++)
5136 high
= (high
<< 8) | arr
[i
];
5137 for (i
= 8, val
= 0; i
< 16; i
++)
5138 val
= (val
<< 8) | arr
[i
];
5139 return immed_double_const (val
, high
, TImode
);
5143 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5144 val
= trunc_int_for_mode (val
, SImode
);
5145 return hwint_to_const_double (SFmode
, val
);
5149 for (i
= 0, val
= 0; i
< 8; i
++)
5150 val
= (val
<< 8) | arr
[i
];
5151 return hwint_to_const_double (DFmode
, val
);
5154 if (!VECTOR_MODE_P (mode
))
5157 units
= GET_MODE_NUNITS (mode
);
5158 size
= GET_MODE_UNIT_SIZE (mode
);
5159 inner_mode
= GET_MODE_INNER (mode
);
5160 v
= rtvec_alloc (units
);
5162 for (k
= i
= 0; i
< units
; ++i
)
5165 for (j
= 0; j
< size
; j
++, k
++)
5166 val
= (val
<< 8) | arr
[k
];
5168 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5169 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5171 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5176 return gen_rtx_CONST_VECTOR (mode
, v
);
5180 reloc_diagnostic (rtx x
)
5183 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5186 if (GET_CODE (x
) == SYMBOL_REF
)
5187 decl
= SYMBOL_REF_DECL (x
);
5188 else if (GET_CODE (x
) == CONST
5189 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5190 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5192 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5193 if (decl
&& !DECL_P (decl
))
5196 /* The decl could be a string constant. */
5197 if (decl
&& DECL_P (decl
))
5200 /* We use last_assemble_variable_decl to get line information. It's
5201 not always going to be right and might not even be close, but will
5202 be right for the more common cases. */
5203 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5204 loc
= DECL_SOURCE_LOCATION (decl
);
5206 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5208 if (TARGET_WARN_RELOC
)
5210 "creating run-time relocation for %qD", decl
);
5213 "creating run-time relocation for %qD", decl
);
5217 if (TARGET_WARN_RELOC
)
5218 warning_at (input_location
, 0, "creating run-time relocation");
5220 error_at (input_location
, "creating run-time relocation");
5224 /* Hook into assemble_integer so we can generate an error for run-time
5225 relocations. The SPU ABI disallows them. */
5227 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5229 /* By default run-time relocations aren't supported, but we allow them
5230 in case users support it in their own run-time loader. And we provide
5231 a warning for those users that don't. */
5232 if ((GET_CODE (x
) == SYMBOL_REF
)
5233 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5234 reloc_diagnostic (x
);
5236 return default_assemble_integer (x
, size
, aligned_p
);
5240 spu_asm_globalize_label (FILE * file
, const char *name
)
5242 fputs ("\t.global\t", file
);
5243 assemble_name (file
, name
);
5248 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
,
5249 int opno ATTRIBUTE_UNUSED
, int *total
,
5250 bool speed ATTRIBUTE_UNUSED
)
5252 machine_mode mode
= GET_MODE (x
);
5253 int cost
= COSTS_N_INSNS (2);
5255 /* Folding to a CONST_VECTOR will use extra space but there might
5256 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5257 only if it allows us to fold away multiple insns. Changing the cost
5258 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5259 because this cost will only be compared against a single insn.
5260 if (code == CONST_VECTOR)
5261 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5264 /* Use defaults for float operations. Not accurate but good enough. */
5267 *total
= COSTS_N_INSNS (13);
5272 *total
= COSTS_N_INSNS (6);
5278 if (satisfies_constraint_K (x
))
5280 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5281 *total
= COSTS_N_INSNS (1);
5283 *total
= COSTS_N_INSNS (3);
5287 *total
= COSTS_N_INSNS (3);
5292 *total
= COSTS_N_INSNS (0);
5296 *total
= COSTS_N_INSNS (5);
5300 case FLOAT_TRUNCATE
:
5302 case UNSIGNED_FLOAT
:
5305 *total
= COSTS_N_INSNS (7);
5311 *total
= COSTS_N_INSNS (9);
5318 GET_CODE (XEXP (x
, 0)) ==
5319 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5320 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5322 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5324 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5325 cost
= COSTS_N_INSNS (14);
5326 if ((val
& 0xffff) == 0)
5327 cost
= COSTS_N_INSNS (9);
5328 else if (val
> 0 && val
< 0x10000)
5329 cost
= COSTS_N_INSNS (11);
5338 *total
= COSTS_N_INSNS (20);
5345 *total
= COSTS_N_INSNS (4);
5348 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5349 *total
= COSTS_N_INSNS (0);
5351 *total
= COSTS_N_INSNS (4);
5354 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5355 if (GET_MODE_CLASS (mode
) == MODE_INT
5356 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5357 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5358 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5364 spu_unwind_word_mode (void)
5369 /* Decide whether we can make a sibling call to a function. DECL is the
5370 declaration of the function being targeted by the call and EXP is the
5371 CALL_EXPR representing the call. */
5373 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5375 return decl
&& !TARGET_LARGE_MEM
;
5378 /* We need to correctly update the back chain pointer and the Available
5379 Stack Size (which is in the second slot of the sp register.) */
5381 spu_allocate_stack (rtx op0
, rtx op1
)
5384 rtx chain
= gen_reg_rtx (V4SImode
);
5385 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5386 rtx sp
= gen_reg_rtx (V4SImode
);
5387 rtx splatted
= gen_reg_rtx (V4SImode
);
5388 rtx pat
= gen_reg_rtx (TImode
);
5390 /* copy the back chain so we can save it back again. */
5391 emit_move_insn (chain
, stack_bot
);
5393 op1
= force_reg (SImode
, op1
);
5395 v
= 0x1020300010203ll
;
5396 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5397 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5399 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5400 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5402 if (flag_stack_check
)
5404 rtx avail
= gen_reg_rtx(SImode
);
5405 rtx result
= gen_reg_rtx(SImode
);
5406 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5407 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5408 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5411 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5413 emit_move_insn (stack_bot
, chain
);
5415 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5419 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5421 static unsigned char arr
[16] =
5422 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5423 rtx temp
= gen_reg_rtx (SImode
);
5424 rtx temp2
= gen_reg_rtx (SImode
);
5425 rtx temp3
= gen_reg_rtx (V4SImode
);
5426 rtx temp4
= gen_reg_rtx (V4SImode
);
5427 rtx pat
= gen_reg_rtx (TImode
);
5428 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5430 /* Restore the backchain from the first word, sp from the second. */
5431 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5432 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5434 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5436 /* Compute Available Stack Size for sp */
5437 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5438 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5440 /* Compute Available Stack Size for back chain */
5441 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5442 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5443 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5445 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5446 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5450 spu_init_libfuncs (void)
5452 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5453 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5454 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5455 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5456 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5457 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5458 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5459 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5460 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5461 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5462 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5463 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5465 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5466 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5468 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5469 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5470 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5471 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5472 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5473 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5474 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5475 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5476 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5477 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5478 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5479 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5481 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5482 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5483 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5484 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5485 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5486 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5489 /* Make a subreg, stripping any existing subreg. We could possibly just
5490 call simplify_subreg, but in this case we know what we want. */
5492 spu_gen_subreg (machine_mode mode
, rtx x
)
5494 if (GET_CODE (x
) == SUBREG
)
5496 if (GET_MODE (x
) == mode
)
5498 return gen_rtx_SUBREG (mode
, x
, 0);
5502 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5504 return (TYPE_MODE (type
) == BLKmode
5506 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5507 || int_size_in_bytes (type
) >
5508 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5511 /* Create the built-in types and functions */
5513 enum spu_function_code
5515 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5516 #include "spu-builtins.def"
5521 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5523 struct spu_builtin_description spu_builtins
[] = {
5524 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5525 {fcode, icode, name, type, params},
5526 #include "spu-builtins.def"
5530 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5532 /* Returns the spu builtin decl for CODE. */
5535 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5537 if (code
>= NUM_SPU_BUILTINS
)
5538 return error_mark_node
;
5540 return spu_builtin_decls
[code
];
5545 spu_init_builtins (void)
5547 struct spu_builtin_description
*d
;
5550 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5551 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5552 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5553 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5554 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5555 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5557 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5558 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5559 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5560 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5562 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5564 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5565 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5566 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5567 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5568 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5569 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5570 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5571 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5572 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5573 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5574 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5575 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5577 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5578 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5579 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5580 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5581 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5582 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5583 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5584 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5586 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5587 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5589 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5591 spu_builtin_types
[SPU_BTI_PTR
] =
5592 build_pointer_type (build_qualified_type
5594 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5596 /* For each builtin we build a new prototype. The tree code will make
5597 sure nodes are shared. */
5598 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5601 char name
[64]; /* build_function will make a copy. */
5607 /* Find last parm. */
5608 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5613 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5615 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5617 sprintf (name
, "__builtin_%s", d
->name
);
5618 spu_builtin_decls
[i
] =
5619 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5620 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5621 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5623 /* These builtins don't throw. */
5624 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5629 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5631 static unsigned char arr
[16] =
5632 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5634 rtx temp
= gen_reg_rtx (Pmode
);
5635 rtx temp2
= gen_reg_rtx (V4SImode
);
5636 rtx temp3
= gen_reg_rtx (V4SImode
);
5637 rtx pat
= gen_reg_rtx (TImode
);
5638 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5640 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5642 /* Restore the sp. */
5643 emit_move_insn (temp
, op1
);
5644 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5646 /* Compute available stack size for sp. */
5647 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5648 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5650 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5651 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5655 spu_safe_dma (HOST_WIDE_INT channel
)
5657 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5661 spu_builtin_splats (rtx ops
[])
5663 machine_mode mode
= GET_MODE (ops
[0]);
5664 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5666 unsigned char arr
[16];
5667 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5668 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5672 rtx reg
= gen_reg_rtx (TImode
);
5674 if (GET_CODE (ops
[1]) != REG
5675 && GET_CODE (ops
[1]) != SUBREG
)
5676 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5682 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5688 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5693 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5698 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5704 emit_move_insn (reg
, shuf
);
5705 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5710 spu_builtin_extract (rtx ops
[])
5715 mode
= GET_MODE (ops
[1]);
5717 if (GET_CODE (ops
[2]) == CONST_INT
)
5722 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5725 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5728 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5731 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5734 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5737 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5745 from
= spu_gen_subreg (TImode
, ops
[1]);
5746 rot
= gen_reg_rtx (TImode
);
5747 tmp
= gen_reg_rtx (SImode
);
5752 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5755 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5756 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5760 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5764 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5769 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5771 emit_insn (gen_spu_convert (ops
[0], rot
));
5775 spu_builtin_insert (rtx ops
[])
5777 machine_mode mode
= GET_MODE (ops
[0]);
5778 machine_mode imode
= GET_MODE_INNER (mode
);
5779 rtx mask
= gen_reg_rtx (TImode
);
5782 if (GET_CODE (ops
[3]) == CONST_INT
)
5783 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5786 offset
= gen_reg_rtx (SImode
);
5787 emit_insn (gen_mulsi3
5788 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5791 (mask
, stack_pointer_rtx
, offset
,
5792 GEN_INT (GET_MODE_SIZE (imode
))));
5793 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5797 spu_builtin_promote (rtx ops
[])
5799 machine_mode mode
, imode
;
5800 rtx rot
, from
, offset
;
5803 mode
= GET_MODE (ops
[0]);
5804 imode
= GET_MODE_INNER (mode
);
5806 from
= gen_reg_rtx (TImode
);
5807 rot
= spu_gen_subreg (TImode
, ops
[0]);
5809 emit_insn (gen_spu_convert (from
, ops
[1]));
5811 if (GET_CODE (ops
[2]) == CONST_INT
)
5813 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5814 if (GET_MODE_SIZE (imode
) < 4)
5815 pos
+= 4 - GET_MODE_SIZE (imode
);
5816 offset
= GEN_INT (pos
& 15);
5820 offset
= gen_reg_rtx (SImode
);
5824 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5827 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5828 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5832 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5833 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5837 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5843 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5847 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5849 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5850 rtx shuf
= gen_reg_rtx (V4SImode
);
5851 rtx insn
= gen_reg_rtx (V4SImode
);
5856 fnaddr
= force_reg (SImode
, fnaddr
);
5857 cxt
= force_reg (SImode
, cxt
);
5859 if (TARGET_LARGE_MEM
)
5861 rtx rotl
= gen_reg_rtx (V4SImode
);
5862 rtx mask
= gen_reg_rtx (V4SImode
);
5863 rtx bi
= gen_reg_rtx (SImode
);
5864 static unsigned char const shufa
[16] = {
5865 2, 3, 0, 1, 18, 19, 16, 17,
5866 0, 1, 2, 3, 16, 17, 18, 19
5868 static unsigned char const insna
[16] = {
5870 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5872 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5875 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5876 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5878 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5879 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5880 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5881 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5883 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5884 emit_move_insn (mem
, insn
);
5886 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5887 mem
= adjust_address (m_tramp
, Pmode
, 16);
5888 emit_move_insn (mem
, bi
);
5892 rtx scxt
= gen_reg_rtx (SImode
);
5893 rtx sfnaddr
= gen_reg_rtx (SImode
);
5894 static unsigned char const insna
[16] = {
5895 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5901 shufc
= gen_reg_rtx (TImode
);
5902 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5904 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5905 fits 18 bits and the last 4 are zeros. This will be true if
5906 the stack pointer is initialized to 0x3fff0 at program start,
5907 otherwise the ila instruction will be garbage. */
5909 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5910 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5912 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5913 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5914 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5916 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5917 emit_move_insn (mem
, insn
);
5919 emit_insn (gen_sync ());
5923 spu_warn_func_return (tree decl
)
5925 /* Naked functions are implemented entirely in assembly, including the
5926 return sequence, so suppress warnings about this. */
5927 return !spu_naked_function_p (decl
);
5931 spu_expand_sign_extend (rtx ops
[])
5933 unsigned char arr
[16];
5934 rtx pat
= gen_reg_rtx (TImode
);
5937 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5938 if (GET_MODE (ops
[1]) == QImode
)
5940 sign
= gen_reg_rtx (HImode
);
5941 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5942 for (i
= 0; i
< 16; i
++)
5948 for (i
= 0; i
< 16; i
++)
5950 switch (GET_MODE (ops
[1]))
5953 sign
= gen_reg_rtx (SImode
);
5954 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5956 arr
[last
- 1] = 0x02;
5959 sign
= gen_reg_rtx (SImode
);
5960 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5961 for (i
= 0; i
< 4; i
++)
5962 arr
[last
- i
] = 3 - i
;
5965 sign
= gen_reg_rtx (SImode
);
5966 c
= gen_reg_rtx (SImode
);
5967 emit_insn (gen_spu_convert (c
, ops
[1]));
5968 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5969 for (i
= 0; i
< 8; i
++)
5970 arr
[last
- i
] = 7 - i
;
5976 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5977 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5980 /* expand vector initialization. If there are any constant parts,
5981 load constant parts first. Then load any non-constant parts. */
5983 spu_expand_vector_init (rtx target
, rtx vals
)
5985 machine_mode mode
= GET_MODE (target
);
5986 int n_elts
= GET_MODE_NUNITS (mode
);
5988 bool all_same
= true;
5989 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5992 first
= XVECEXP (vals
, 0, 0);
5993 for (i
= 0; i
< n_elts
; ++i
)
5995 x
= XVECEXP (vals
, 0, i
);
5996 if (!(CONST_INT_P (x
)
5997 || GET_CODE (x
) == CONST_DOUBLE
5998 || GET_CODE (x
) == CONST_FIXED
))
6002 if (first_constant
== NULL_RTX
)
6005 if (i
> 0 && !rtx_equal_p (x
, first
))
6009 /* if all elements are the same, use splats to repeat elements */
6012 if (!CONSTANT_P (first
)
6013 && !register_operand (first
, GET_MODE (x
)))
6014 first
= force_reg (GET_MODE (first
), first
);
6015 emit_insn (gen_spu_splats (target
, first
));
6019 /* load constant parts */
6020 if (n_var
!= n_elts
)
6024 emit_move_insn (target
,
6025 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6029 rtx constant_parts_rtx
= copy_rtx (vals
);
6031 gcc_assert (first_constant
!= NULL_RTX
);
6032 /* fill empty slots with the first constant, this increases
6033 our chance of using splats in the recursive call below. */
6034 for (i
= 0; i
< n_elts
; ++i
)
6036 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6037 if (!(CONST_INT_P (x
)
6038 || GET_CODE (x
) == CONST_DOUBLE
6039 || GET_CODE (x
) == CONST_FIXED
))
6040 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6043 spu_expand_vector_init (target
, constant_parts_rtx
);
6047 /* load variable parts */
6050 rtx insert_operands
[4];
6052 insert_operands
[0] = target
;
6053 insert_operands
[2] = target
;
6054 for (i
= 0; i
< n_elts
; ++i
)
6056 x
= XVECEXP (vals
, 0, i
);
6057 if (!(CONST_INT_P (x
)
6058 || GET_CODE (x
) == CONST_DOUBLE
6059 || GET_CODE (x
) == CONST_FIXED
))
6061 if (!register_operand (x
, GET_MODE (x
)))
6062 x
= force_reg (GET_MODE (x
), x
);
6063 insert_operands
[1] = x
;
6064 insert_operands
[3] = GEN_INT (i
);
6065 spu_builtin_insert (insert_operands
);
6071 /* Return insn index for the vector compare instruction for given CODE,
6072 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6075 get_vec_cmp_insn (enum rtx_code code
,
6076 machine_mode dest_mode
,
6077 machine_mode op_mode
)
6083 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6084 return CODE_FOR_ceq_v16qi
;
6085 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6086 return CODE_FOR_ceq_v8hi
;
6087 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6088 return CODE_FOR_ceq_v4si
;
6089 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6090 return CODE_FOR_ceq_v4sf
;
6091 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6092 return CODE_FOR_ceq_v2df
;
6095 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6096 return CODE_FOR_cgt_v16qi
;
6097 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6098 return CODE_FOR_cgt_v8hi
;
6099 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6100 return CODE_FOR_cgt_v4si
;
6101 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6102 return CODE_FOR_cgt_v4sf
;
6103 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6104 return CODE_FOR_cgt_v2df
;
6107 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6108 return CODE_FOR_clgt_v16qi
;
6109 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6110 return CODE_FOR_clgt_v8hi
;
6111 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6112 return CODE_FOR_clgt_v4si
;
6120 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6121 DMODE is expected destination mode. This is a recursive function. */
6124 spu_emit_vector_compare (enum rtx_code rcode
,
6130 machine_mode dest_mode
;
6131 machine_mode op_mode
= GET_MODE (op1
);
6133 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6135 /* Floating point vector compare instructions uses destination V4SImode.
6136 Double floating point vector compare instructions uses destination V2DImode.
6137 Move destination to appropriate mode later. */
6138 if (dmode
== V4SFmode
)
6139 dest_mode
= V4SImode
;
6140 else if (dmode
== V2DFmode
)
6141 dest_mode
= V2DImode
;
6145 mask
= gen_reg_rtx (dest_mode
);
6146 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6148 if (vec_cmp_insn
== -1)
6150 bool swap_operands
= false;
6151 bool try_again
= false;
6156 swap_operands
= true;
6161 swap_operands
= true;
6171 /* Treat A != B as ~(A==B). */
6173 enum rtx_code rev_code
;
6174 enum insn_code nor_code
;
6177 rev_code
= reverse_condition_maybe_unordered (rcode
);
6178 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6180 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6181 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6182 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6183 if (dmode
!= dest_mode
)
6185 rtx temp
= gen_reg_rtx (dest_mode
);
6186 convert_move (temp
, mask
, 0);
6196 /* Try GT/GTU/LT/LTU OR EQ */
6199 enum insn_code ior_code
;
6200 enum rtx_code new_code
;
6204 case GE
: new_code
= GT
; break;
6205 case GEU
: new_code
= GTU
; break;
6206 case LE
: new_code
= LT
; break;
6207 case LEU
: new_code
= LTU
; break;
6212 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6213 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6215 ior_code
= optab_handler (ior_optab
, dest_mode
);
6216 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6217 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6218 if (dmode
!= dest_mode
)
6220 rtx temp
= gen_reg_rtx (dest_mode
);
6221 convert_move (temp
, mask
, 0);
6231 enum insn_code ior_code
;
6233 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6234 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6236 ior_code
= optab_handler (ior_optab
, dest_mode
);
6237 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6238 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6239 if (dmode
!= dest_mode
)
6241 rtx temp
= gen_reg_rtx (dest_mode
);
6242 convert_move (temp
, mask
, 0);
6249 /* Implement as (A==A) & (B==B) */
6252 enum insn_code and_code
;
6254 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6255 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6257 and_code
= optab_handler (and_optab
, dest_mode
);
6258 gcc_assert (and_code
!= CODE_FOR_nothing
);
6259 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6260 if (dmode
!= dest_mode
)
6262 rtx temp
= gen_reg_rtx (dest_mode
);
6263 convert_move (temp
, mask
, 0);
6273 /* You only get two chances. */
6275 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6277 gcc_assert (vec_cmp_insn
!= -1);
6288 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6289 if (dmode
!= dest_mode
)
6291 rtx temp
= gen_reg_rtx (dest_mode
);
6292 convert_move (temp
, mask
, 0);
6299 /* Emit vector conditional expression.
6300 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6301 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6304 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6305 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6307 machine_mode dest_mode
= GET_MODE (dest
);
6308 enum rtx_code rcode
= GET_CODE (cond
);
6311 /* Get the vector mask for the given relational operations. */
6312 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6314 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6320 spu_force_reg (machine_mode mode
, rtx op
)
6323 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6325 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6326 || GET_MODE (op
) == BLKmode
)
6327 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6331 r
= force_reg (GET_MODE (op
), op
);
6332 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6334 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6339 x
= gen_reg_rtx (mode
);
6340 emit_insn (gen_spu_convert (x
, r
));
6345 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6347 HOST_WIDE_INT v
= 0;
6349 /* Check the range of immediate operands. */
6350 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6352 int range
= p
- SPU_BTI_7
;
6354 if (!CONSTANT_P (op
))
6355 error ("%s expects an integer literal in the range [%d, %d]",
6357 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6359 if (GET_CODE (op
) == CONST
6360 && (GET_CODE (XEXP (op
, 0)) == PLUS
6361 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6363 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6364 op
= XEXP (XEXP (op
, 0), 0);
6366 else if (GET_CODE (op
) == CONST_INT
)
6368 else if (GET_CODE (op
) == CONST_VECTOR
6369 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6370 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6372 /* The default for v is 0 which is valid in every range. */
6373 if (v
< spu_builtin_range
[range
].low
6374 || v
> spu_builtin_range
[range
].high
)
6375 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6377 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6386 /* This is only used in lqa, and stqa. Even though the insns
6387 encode 16 bits of the address (all but the 2 least
6388 significant), only 14 bits are used because it is masked to
6389 be 16 byte aligned. */
6393 /* This is used for lqr and stqr. */
6400 if (GET_CODE (op
) == LABEL_REF
6401 || (GET_CODE (op
) == SYMBOL_REF
6402 && SYMBOL_REF_FUNCTION_P (op
))
6403 || (v
& ((1 << lsbits
) - 1)) != 0)
6404 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6411 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6412 rtx target
, rtx ops
[])
6414 enum insn_code icode
= (enum insn_code
) d
->icode
;
6417 /* Expand the arguments into rtl. */
6419 if (d
->parm
[0] != SPU_BTI_VOID
)
6422 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6424 tree arg
= CALL_EXPR_ARG (exp
, a
);
6427 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6430 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6435 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6436 tree exp
, rtx target
)
6440 enum insn_code icode
= (enum insn_code
) d
->icode
;
6441 machine_mode mode
, tmode
;
6446 /* Set up ops[] with values from arglist. */
6447 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6449 /* Handle the target operand which must be operand 0. */
6451 if (d
->parm
[0] != SPU_BTI_VOID
)
6454 /* We prefer the mode specified for the match_operand otherwise
6455 use the mode from the builtin function prototype. */
6456 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6457 if (tmode
== VOIDmode
)
6458 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6460 /* Try to use target because not using it can lead to extra copies
6461 and when we are using all of the registers extra copies leads
6463 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6466 target
= ops
[0] = gen_reg_rtx (tmode
);
6468 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6474 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6476 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6481 arg
= CALL_EXPR_ARG (exp
, 0);
6482 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6483 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6484 addr
= memory_address (mode
, op
);
6487 op
= gen_reg_rtx (GET_MODE (addr
));
6488 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6489 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6490 op
= gen_rtx_MEM (mode
, op
);
6492 pat
= GEN_FCN (icode
) (target
, op
);
6499 /* Ignore align_hint, but still expand it's args in case they have
6501 if (icode
== CODE_FOR_spu_align_hint
)
6504 /* Handle the rest of the operands. */
6505 for (p
= 1; i
< n_operands
; i
++, p
++)
6507 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6508 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6510 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6512 /* mode can be VOIDmode here for labels */
6514 /* For specific intrinsics with an immediate operand, e.g.,
6515 si_ai(), we sometimes need to convert the scalar argument to a
6516 vector argument by splatting the scalar. */
6517 if (VECTOR_MODE_P (mode
)
6518 && (GET_CODE (ops
[i
]) == CONST_INT
6519 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6520 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6522 if (GET_CODE (ops
[i
]) == CONST_INT
)
6523 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6526 rtx reg
= gen_reg_rtx (mode
);
6527 machine_mode imode
= GET_MODE_INNER (mode
);
6528 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6529 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6530 if (imode
!= GET_MODE (ops
[i
]))
6531 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6532 TYPE_UNSIGNED (spu_builtin_types
6534 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6539 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6541 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6542 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6548 pat
= GEN_FCN (icode
) (0);
6551 pat
= GEN_FCN (icode
) (ops
[0]);
6554 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6557 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6560 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6563 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6566 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6575 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6576 emit_call_insn (pat
);
6577 else if (d
->type
== B_JUMP
)
6579 emit_jump_insn (pat
);
6585 return_type
= spu_builtin_types
[d
->parm
[0]];
6586 if (d
->parm
[0] != SPU_BTI_VOID
6587 && GET_MODE (target
) != TYPE_MODE (return_type
))
6589 /* target is the return value. It should always be the mode of
6590 the builtin function prototype. */
6591 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6598 spu_expand_builtin (tree exp
,
6600 rtx subtarget ATTRIBUTE_UNUSED
,
6601 machine_mode mode ATTRIBUTE_UNUSED
,
6602 int ignore ATTRIBUTE_UNUSED
)
6604 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6605 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6606 struct spu_builtin_description
*d
;
6608 if (fcode
< NUM_SPU_BUILTINS
)
6610 d
= &spu_builtins
[fcode
];
6612 return spu_expand_builtin_1 (d
, exp
, target
);
6617 /* Implement targetm.vectorize.builtin_mask_for_load. */
6619 spu_builtin_mask_for_load (void)
6621 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6624 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6626 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6628 int misalign ATTRIBUTE_UNUSED
)
6632 switch (type_of_cost
)
6640 case cond_branch_not_taken
:
6642 case vec_promote_demote
:
6649 /* Load + rotate. */
6652 case unaligned_load
:
6655 case cond_branch_taken
:
6659 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6660 return elements
/ 2 + 1;
6667 /* Implement targetm.vectorize.init_cost. */
6670 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6672 unsigned *cost
= XNEWVEC (unsigned, 3);
6673 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6677 /* Implement targetm.vectorize.add_stmt_cost. */
6680 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6681 struct _stmt_vec_info
*stmt_info
, int misalign
,
6682 enum vect_cost_model_location where
)
6684 unsigned *cost
= (unsigned *) data
;
6685 unsigned retval
= 0;
6687 if (flag_vect_cost_model
)
6689 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6690 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6692 /* Statements in an inner loop relative to the loop being
6693 vectorized are weighted more heavily. The value here is
6694 arbitrary and could potentially be improved with analysis. */
6695 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6696 count
*= 50; /* FIXME. */
6698 retval
= (unsigned) (count
* stmt_cost
);
6699 cost
[where
] += retval
;
6705 /* Implement targetm.vectorize.finish_cost. */
6708 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6709 unsigned *body_cost
, unsigned *epilogue_cost
)
6711 unsigned *cost
= (unsigned *) data
;
6712 *prologue_cost
= cost
[vect_prologue
];
6713 *body_cost
= cost
[vect_body
];
6714 *epilogue_cost
= cost
[vect_epilogue
];
6717 /* Implement targetm.vectorize.destroy_cost_data. */
6720 spu_destroy_cost_data (void *data
)
6725 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6726 after applying N number of iterations. This routine does not determine
6727 how may iterations are required to reach desired alignment. */
6730 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6735 /* All other types are naturally aligned. */
6739 /* Return the appropriate mode for a named address pointer. */
6741 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6745 case ADDR_SPACE_GENERIC
:
6754 /* Return the appropriate mode for a named address address. */
6756 spu_addr_space_address_mode (addr_space_t addrspace
)
6760 case ADDR_SPACE_GENERIC
:
6769 /* Determine if one named address space is a subset of another. */
6772 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6774 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6775 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6777 if (subset
== superset
)
6780 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6781 being subsets but instead as disjoint address spaces. */
6782 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6786 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6789 /* Convert from one address space to another. */
6791 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6793 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6794 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6796 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6797 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6799 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6803 ls
= gen_const_mem (DImode
,
6804 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6805 set_mem_align (ls
, 128);
6807 result
= gen_reg_rtx (Pmode
);
6808 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6809 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6810 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6811 ls
, const0_rtx
, Pmode
, 1);
6813 emit_insn (gen_subsi3 (result
, op
, ls
));
6818 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6822 ls
= gen_const_mem (DImode
,
6823 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6824 set_mem_align (ls
, 128);
6826 result
= gen_reg_rtx (EAmode
);
6827 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6828 op
= force_reg (Pmode
, op
);
6829 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6830 ls
, const0_rtx
, EAmode
, 1);
6831 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6833 if (EAmode
== SImode
)
6834 emit_insn (gen_addsi3 (result
, op
, ls
));
6836 emit_insn (gen_adddi3 (result
, op
, ls
));
6846 /* Count the total number of instructions in each pipe and return the
6847 maximum, which is used as the Minimum Iteration Interval (MII)
6848 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6849 -2 are instructions that can go in pipe0 or pipe1. */
6851 spu_sms_res_mii (struct ddg
*g
)
6854 unsigned t
[4] = {0, 0, 0, 0};
6856 for (i
= 0; i
< g
->num_nodes
; i
++)
6858 rtx_insn
*insn
= g
->nodes
[i
].insn
;
6859 int p
= get_pipe (insn
) + 2;
6861 gcc_assert (p
>= 0);
6865 if (dump_file
&& INSN_P (insn
))
6866 fprintf (dump_file
, "i%d %s %d %d\n",
6868 insn_data
[INSN_CODE(insn
)].name
,
6872 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6874 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6879 spu_init_expanders (void)
6884 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6885 frame_pointer_needed is true. We don't know that until we're
6886 expanding the prologue. */
6887 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6889 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6890 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6891 to be treated as aligned, so generate them here. */
6892 r0
= gen_reg_rtx (SImode
);
6893 r1
= gen_reg_rtx (SImode
);
6894 mark_reg_pointer (r0
, 128);
6895 mark_reg_pointer (r1
, 128);
6896 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6897 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6902 spu_libgcc_cmp_return_mode (void)
6905 /* For SPU word mode is TI mode so it is better to use SImode
6906 for compare returns. */
6911 spu_libgcc_shift_count_mode (void)
6913 /* For SPU word mode is TI mode so it is better to use SImode
6914 for shift counts. */
6918 /* Implement targetm.section_type_flags. */
6920 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6922 /* .toe needs to have type @nobits. */
6923 if (strcmp (name
, ".toe") == 0)
6925 /* Don't load _ea into the current address space. */
6926 if (strcmp (name
, "._ea") == 0)
6927 return SECTION_WRITE
| SECTION_DEBUG
;
6928 return default_section_type_flags (decl
, name
, reloc
);
6931 /* Implement targetm.select_section. */
6933 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6935 /* Variables and constants defined in the __ea address space
6936 go into a special section named "._ea". */
6937 if (TREE_TYPE (decl
) != error_mark_node
6938 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6940 /* We might get called with string constants, but get_named_section
6941 doesn't like them as they are not DECLs. Also, we need to set
6942 flags in that case. */
6944 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6946 return get_named_section (decl
, "._ea", reloc
);
6949 return default_elf_select_section (decl
, reloc
, align
);
6952 /* Implement targetm.unique_section. */
6954 spu_unique_section (tree decl
, int reloc
)
6956 /* We don't support unique section names in the __ea address
6958 if (TREE_TYPE (decl
) != error_mark_node
6959 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6962 default_unique_section (decl
, reloc
);
6965 /* Generate a constant or register which contains 2^SCALE. We assume
6966 the result is valid for MODE. Currently, MODE must be V4SFmode and
6967 SCALE must be SImode. */
6969 spu_gen_exp2 (machine_mode mode
, rtx scale
)
6971 gcc_assert (mode
== V4SFmode
);
6972 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6973 if (GET_CODE (scale
) != CONST_INT
)
6975 /* unsigned int exp = (127 + scale) << 23;
6976 __vector float m = (__vector float) spu_splats (exp); */
6977 rtx reg
= force_reg (SImode
, scale
);
6978 rtx exp
= gen_reg_rtx (SImode
);
6979 rtx mul
= gen_reg_rtx (mode
);
6980 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6981 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6982 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6987 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6988 unsigned char arr
[16];
6989 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6990 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6991 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6992 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6993 return array_to_constant (mode
, arr
);
6997 /* After reload, just change the convert into a move instruction
6998 or a dead instruction. */
7000 spu_split_convert (rtx ops
[])
7002 if (REGNO (ops
[0]) == REGNO (ops
[1]))
7003 emit_note (NOTE_INSN_DELETED
);
7006 /* Use TImode always as this might help hard reg copyprop. */
7007 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
7008 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
7009 emit_insn (gen_move_insn (op0
, op1
));
7014 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
7016 fprintf (file
, "# profile\n");
7017 fprintf (file
, "brsl $75, _mcount\n");
7020 /* Implement targetm.ref_may_alias_errno. */
7022 spu_ref_may_alias_errno (ao_ref
*ref
)
7024 tree base
= ao_ref_base (ref
);
7026 /* With SPU newlib, errno is defined as something like
7028 The default implementation of this target macro does not
7029 recognize such expressions, so special-code for it here. */
7031 if (TREE_CODE (base
) == VAR_DECL
7032 && !TREE_STATIC (base
)
7033 && DECL_EXTERNAL (base
)
7034 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7035 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7036 "_impure_data") == 0
7037 /* _errno is the first member of _impure_data. */
7038 && ref
->offset
== 0)
7041 return default_ref_may_alias_errno (ref
);
7044 /* Output thunk to FILE that implements a C++ virtual function call (with
7045 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7046 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7047 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7048 relative to the resulting this pointer. */
7051 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7052 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7057 /* Make sure unwind info is emitted for the thunk if needed. */
7058 final_start_function (emit_barrier (), file
, 1);
7060 /* Operand 0 is the target function. */
7061 op
[0] = XEXP (DECL_RTL (function
), 0);
7063 /* Operand 1 is the 'this' pointer. */
7064 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7065 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7067 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7069 /* Operands 2/3 are the low/high halfwords of delta. */
7070 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7071 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7073 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7074 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7075 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7077 /* Operands 6/7 are temporary registers. */
7078 op
[6] = gen_rtx_REG (Pmode
, 79);
7079 op
[7] = gen_rtx_REG (Pmode
, 78);
7081 /* Add DELTA to this pointer. */
7084 if (delta
>= -0x200 && delta
< 0x200)
7085 output_asm_insn ("ai\t%1,%1,%2", op
);
7086 else if (delta
>= -0x8000 && delta
< 0x8000)
7088 output_asm_insn ("il\t%6,%2", op
);
7089 output_asm_insn ("a\t%1,%1,%6", op
);
7093 output_asm_insn ("ilhu\t%6,%3", op
);
7094 output_asm_insn ("iohl\t%6,%2", op
);
7095 output_asm_insn ("a\t%1,%1,%6", op
);
7099 /* Perform vcall adjustment. */
7102 output_asm_insn ("lqd\t%7,0(%1)", op
);
7103 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7105 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7106 output_asm_insn ("ai\t%7,%7,%4", op
);
7107 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7109 output_asm_insn ("il\t%6,%4", op
);
7110 output_asm_insn ("a\t%7,%7,%6", op
);
7114 output_asm_insn ("ilhu\t%6,%5", op
);
7115 output_asm_insn ("iohl\t%6,%4", op
);
7116 output_asm_insn ("a\t%7,%7,%6", op
);
7119 output_asm_insn ("lqd\t%6,0(%7)", op
);
7120 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7121 output_asm_insn ("a\t%1,%1,%6", op
);
7124 /* Jump to target. */
7125 output_asm_insn ("br\t%0", op
);
7127 final_end_function ();
7130 /* Canonicalize a comparison from one we don't have to one we do have. */
7132 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7133 bool op0_preserve_value
)
7135 if (!op0_preserve_value
7136 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7141 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7145 /* Table of machine attributes. */
7146 static const struct attribute_spec spu_attribute_table
[] =
7148 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7149 affects_type_identity } */
7150 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7152 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7154 { NULL
, 0, 0, false, false, false, NULL
, false }
7157 /* TARGET overrides. */
7159 #undef TARGET_ADDR_SPACE_POINTER_MODE
7160 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7162 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7163 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7165 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7166 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7167 spu_addr_space_legitimate_address_p
7169 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7170 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7172 #undef TARGET_ADDR_SPACE_SUBSET_P
7173 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7175 #undef TARGET_ADDR_SPACE_CONVERT
7176 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7178 #undef TARGET_INIT_BUILTINS
7179 #define TARGET_INIT_BUILTINS spu_init_builtins
7180 #undef TARGET_BUILTIN_DECL
7181 #define TARGET_BUILTIN_DECL spu_builtin_decl
7183 #undef TARGET_EXPAND_BUILTIN
7184 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7186 #undef TARGET_UNWIND_WORD_MODE
7187 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7189 #undef TARGET_LEGITIMIZE_ADDRESS
7190 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7192 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7193 and .quad for the debugger. When it is known that the assembler is fixed,
7194 these can be removed. */
7195 #undef TARGET_ASM_UNALIGNED_SI_OP
7196 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7198 #undef TARGET_ASM_ALIGNED_DI_OP
7199 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7201 /* The .8byte directive doesn't seem to work well for a 32 bit
7203 #undef TARGET_ASM_UNALIGNED_DI_OP
7204 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7206 #undef TARGET_RTX_COSTS
7207 #define TARGET_RTX_COSTS spu_rtx_costs
7209 #undef TARGET_ADDRESS_COST
7210 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7212 #undef TARGET_SCHED_ISSUE_RATE
7213 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7215 #undef TARGET_SCHED_INIT_GLOBAL
7216 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7218 #undef TARGET_SCHED_INIT
7219 #define TARGET_SCHED_INIT spu_sched_init
7221 #undef TARGET_SCHED_VARIABLE_ISSUE
7222 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7224 #undef TARGET_SCHED_REORDER
7225 #define TARGET_SCHED_REORDER spu_sched_reorder
7227 #undef TARGET_SCHED_REORDER2
7228 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7230 #undef TARGET_SCHED_ADJUST_COST
7231 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7233 #undef TARGET_ATTRIBUTE_TABLE
7234 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7236 #undef TARGET_ASM_INTEGER
7237 #define TARGET_ASM_INTEGER spu_assemble_integer
7239 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7240 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7242 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7243 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7245 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7246 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7248 #undef TARGET_ASM_GLOBALIZE_LABEL
7249 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7251 #undef TARGET_PASS_BY_REFERENCE
7252 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7254 #undef TARGET_FUNCTION_ARG
7255 #define TARGET_FUNCTION_ARG spu_function_arg
7257 #undef TARGET_FUNCTION_ARG_ADVANCE
7258 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7260 #undef TARGET_MUST_PASS_IN_STACK
7261 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7263 #undef TARGET_BUILD_BUILTIN_VA_LIST
7264 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7266 #undef TARGET_EXPAND_BUILTIN_VA_START
7267 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7269 #undef TARGET_SETUP_INCOMING_VARARGS
7270 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7272 #undef TARGET_MACHINE_DEPENDENT_REORG
7273 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7275 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7276 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7278 #undef TARGET_INIT_LIBFUNCS
7279 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7281 #undef TARGET_RETURN_IN_MEMORY
7282 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7284 #undef TARGET_ENCODE_SECTION_INFO
7285 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7287 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7288 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7290 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7291 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7293 #undef TARGET_VECTORIZE_INIT_COST
7294 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7296 #undef TARGET_VECTORIZE_ADD_STMT_COST
7297 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7299 #undef TARGET_VECTORIZE_FINISH_COST
7300 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7302 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7303 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7305 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7306 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7308 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7309 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7311 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7312 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7314 #undef TARGET_SCHED_SMS_RES_MII
7315 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7317 #undef TARGET_SECTION_TYPE_FLAGS
7318 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7320 #undef TARGET_ASM_SELECT_SECTION
7321 #define TARGET_ASM_SELECT_SECTION spu_select_section
7323 #undef TARGET_ASM_UNIQUE_SECTION
7324 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7326 #undef TARGET_LEGITIMATE_ADDRESS_P
7327 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7329 #undef TARGET_LEGITIMATE_CONSTANT_P
7330 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7332 #undef TARGET_TRAMPOLINE_INIT
7333 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7335 #undef TARGET_WARN_FUNC_RETURN
7336 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7338 #undef TARGET_OPTION_OVERRIDE
7339 #define TARGET_OPTION_OVERRIDE spu_option_override
7341 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7342 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7344 #undef TARGET_REF_MAY_ALIAS_ERRNO
7345 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7347 #undef TARGET_ASM_OUTPUT_MI_THUNK
7348 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7349 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7350 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7352 /* Variable tracking should be run after all optimizations which
7353 change order of insns. It also needs a valid CFG. */
7354 #undef TARGET_DELAY_VARTRACK
7355 #define TARGET_DELAY_VARTRACK true
7357 #undef TARGET_CANONICALIZE_COMPARISON
7358 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7360 #undef TARGET_CAN_USE_DOLOOP_P
7361 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7363 struct gcc_target targetm
= TARGET_INITIALIZER
;