1 /* Copyright (C) 2006-2015 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
33 #include "fold-const.h"
34 #include "stringpool.h"
35 #include "stor-layout.h"
45 #include "insn-codes.h"
50 #include "dominance.h"
56 #include "cfgcleanup.h"
57 #include "basic-block.h"
58 #include "diagnostic-core.h"
61 #include "langhooks.h"
63 #include "sched-int.h"
65 #include "tree-ssa-alias.h"
66 #include "internal-fn.h"
67 #include "gimple-fold.h"
69 #include "gimple-expr.h"
72 #include "tm-constrs.h"
82 #include "target-def.h"
84 /* Builtin types, data and prototypes. */
86 enum spu_builtin_type_index
88 SPU_BTI_END_OF_PARAMS
,
90 /* We create new type nodes for these. */
102 /* A 16-byte type. (Implemented with V16QI_type_node) */
105 /* These all correspond to intSI_type_node */
119 /* These correspond to the standard types */
139 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
140 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
141 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
142 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
143 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
144 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
145 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
146 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
147 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
148 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
150 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
152 struct spu_builtin_range
157 static struct spu_builtin_range spu_builtin_range
[] = {
158 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
159 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
160 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
161 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
162 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
163 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
164 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
165 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
166 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
167 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
168 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
169 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
173 /* Target specific attribute specifications. */
174 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
176 /* Prototypes and external defs. */
177 static int get_pipe (rtx_insn
*insn
);
178 static int spu_naked_function_p (tree func
);
179 static int mem_is_padded_component_ref (rtx x
);
180 static void fix_range (const char *);
181 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
183 /* Which instruction set architecture to use. */
185 /* Which cpu are we tuning for. */
188 /* The hardware requires 8 insns between a hint and the branch it
189 effects. This variable describes how many rtl instructions the
190 compiler needs to see before inserting a hint, and then the compiler
191 will insert enough nops to make it at least 8 insns. The default is
192 for the compiler to allow up to 2 nops be emitted. The nops are
193 inserted in pairs, so we round down. */
194 int spu_hint_dist
= (8*4) - (2*4);
209 IC_POOL
, /* constant pool */
210 IC_IL1
, /* one il* instruction */
211 IC_IL2
, /* both ilhu and iohl instructions */
212 IC_IL1s
, /* one il* instruction */
213 IC_IL2s
, /* both ilhu and iohl instructions */
214 IC_FSMBI
, /* the fsmbi instruction */
215 IC_CPAT
, /* one of the c*d instructions */
216 IC_FSMBI2
/* fsmbi plus 1 other instruction */
219 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
220 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
221 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
222 static enum immediate_class
classify_immediate (rtx op
,
225 /* Pointer mode for __ea references. */
226 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
229 /* Define the structure for the machine field in struct function. */
230 struct GTY(()) machine_function
232 /* Register to use for PIC accesses. */
236 /* How to allocate a 'struct machine_function'. */
237 static struct machine_function
*
238 spu_init_machine_status (void)
240 return ggc_cleared_alloc
<machine_function
> ();
243 /* Implement TARGET_OPTION_OVERRIDE. */
245 spu_option_override (void)
247 /* Set up function hooks. */
248 init_machine_status
= spu_init_machine_status
;
250 /* Small loops will be unpeeled at -O3. For SPU it is more important
251 to keep code small by default. */
252 if (!flag_unroll_loops
&& !flag_peel_loops
)
253 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
254 global_options
.x_param_values
,
255 global_options_set
.x_param_values
);
257 flag_omit_frame_pointer
= 1;
259 /* Functions must be 8 byte aligned so we correctly handle dual issue */
260 if (align_functions
< 8)
263 spu_hint_dist
= 8*4 - spu_max_nops
*4;
264 if (spu_hint_dist
< 0)
267 if (spu_fixed_range_string
)
268 fix_range (spu_fixed_range_string
);
270 /* Determine processor architectural level. */
273 if (strcmp (&spu_arch_string
[0], "cell") == 0)
274 spu_arch
= PROCESSOR_CELL
;
275 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
276 spu_arch
= PROCESSOR_CELLEDP
;
278 error ("bad value (%s) for -march= switch", spu_arch_string
);
281 /* Determine processor to tune for. */
284 if (strcmp (&spu_tune_string
[0], "cell") == 0)
285 spu_tune
= PROCESSOR_CELL
;
286 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
287 spu_tune
= PROCESSOR_CELLEDP
;
289 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
292 /* Change defaults according to the processor architecture. */
293 if (spu_arch
== PROCESSOR_CELLEDP
)
295 /* If no command line option has been otherwise specified, change
296 the default to -mno-safe-hints on celledp -- only the original
297 Cell/B.E. processors require this workaround. */
298 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
299 target_flags
&= ~MASK_SAFE_HINTS
;
302 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
305 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
306 struct attribute_spec.handler. */
308 /* True if MODE is valid for the target. By "valid", we mean able to
309 be manipulated in non-trivial ways. In particular, this means all
310 the arithmetic is supported. */
312 spu_scalar_mode_supported_p (machine_mode mode
)
330 /* Similarly for vector modes. "Supported" here is less strict. At
331 least some operations are supported; need to check optabs or builtins
332 for further details. */
334 spu_vector_mode_supported_p (machine_mode mode
)
351 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
352 least significant bytes of the outer mode. This function returns
353 TRUE for the SUBREG's where this is correct. */
355 valid_subreg (rtx op
)
357 machine_mode om
= GET_MODE (op
);
358 machine_mode im
= GET_MODE (SUBREG_REG (op
));
359 return om
!= VOIDmode
&& im
!= VOIDmode
360 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
361 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
362 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
365 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
366 and adjust the start offset. */
368 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
372 /* Strip any paradoxical SUBREG. */
373 if (GET_CODE (op
) == SUBREG
374 && (GET_MODE_BITSIZE (GET_MODE (op
))
375 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
379 GET_MODE_BITSIZE (GET_MODE (op
)) -
380 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
381 op
= SUBREG_REG (op
);
383 /* If it is smaller than SI, assure a SUBREG */
384 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
388 *start
+= 32 - op_size
;
391 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
392 mode
= mode_for_size (op_size
, MODE_INT
, 0);
393 if (mode
!= GET_MODE (op
))
394 op
= gen_rtx_SUBREG (mode
, op
, 0);
399 spu_expand_extv (rtx ops
[], int unsignedp
)
401 rtx dst
= ops
[0], src
= ops
[1];
402 HOST_WIDE_INT width
= INTVAL (ops
[2]);
403 HOST_WIDE_INT start
= INTVAL (ops
[3]);
404 HOST_WIDE_INT align_mask
;
405 rtx s0
, s1
, mask
, r0
;
407 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
411 /* First, determine if we need 1 TImode load or 2. We need only 1
412 if the bits being extracted do not cross the alignment boundary
413 as determined by the MEM and its address. */
415 align_mask
= -MEM_ALIGN (src
);
416 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
418 /* Alignment is sufficient for 1 load. */
419 s0
= gen_reg_rtx (TImode
);
420 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
423 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
428 s0
= gen_reg_rtx (TImode
);
429 s1
= gen_reg_rtx (TImode
);
430 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
433 gcc_assert (start
+ width
<= 128);
436 rtx r1
= gen_reg_rtx (SImode
);
437 mask
= gen_reg_rtx (TImode
);
438 emit_move_insn (mask
, GEN_INT (-1));
439 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
440 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
441 if (GET_CODE (r0
) == CONST_INT
)
442 r1
= GEN_INT (INTVAL (r0
) & 15);
444 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
445 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
446 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
451 else if (GET_CODE (src
) == SUBREG
)
453 rtx r
= SUBREG_REG (src
);
454 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
455 s0
= gen_reg_rtx (TImode
);
456 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
457 emit_insn (gen_rtx_SET (s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
459 emit_move_insn (s0
, src
);
463 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
464 s0
= gen_reg_rtx (TImode
);
465 emit_move_insn (s0
, src
);
468 /* Now s0 is TImode and contains the bits to extract at start. */
471 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
474 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
476 emit_move_insn (dst
, s0
);
480 spu_expand_insv (rtx ops
[])
482 HOST_WIDE_INT width
= INTVAL (ops
[1]);
483 HOST_WIDE_INT start
= INTVAL (ops
[2]);
484 HOST_WIDE_INT maskbits
;
485 machine_mode dst_mode
;
486 rtx dst
= ops
[0], src
= ops
[3];
493 if (GET_CODE (ops
[0]) == MEM
)
494 dst
= gen_reg_rtx (TImode
);
496 dst
= adjust_operand (dst
, &start
);
497 dst_mode
= GET_MODE (dst
);
498 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
500 if (CONSTANT_P (src
))
503 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
504 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
506 src
= adjust_operand (src
, 0);
508 mask
= gen_reg_rtx (dst_mode
);
509 shift_reg
= gen_reg_rtx (dst_mode
);
510 shift
= dst_size
- start
- width
;
512 /* It's not safe to use subreg here because the compiler assumes
513 that the SUBREG_REG is right justified in the SUBREG. */
514 convert_move (shift_reg
, src
, 1);
521 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
524 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
527 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
539 maskbits
= (-1ll << (32 - width
- start
));
541 maskbits
+= (1ll << (32 - start
));
542 emit_move_insn (mask
, GEN_INT (maskbits
));
545 maskbits
= (-1ll << (64 - width
- start
));
547 maskbits
+= (1ll << (64 - start
));
548 emit_move_insn (mask
, GEN_INT (maskbits
));
552 unsigned char arr
[16];
554 memset (arr
, 0, sizeof (arr
));
555 arr
[i
] = 0xff >> (start
& 7);
556 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
558 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
559 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
565 if (GET_CODE (ops
[0]) == MEM
)
567 rtx low
= gen_reg_rtx (SImode
);
568 rtx rotl
= gen_reg_rtx (SImode
);
569 rtx mask0
= gen_reg_rtx (TImode
);
575 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
576 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
577 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
578 emit_insn (gen_negsi2 (rotl
, low
));
579 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
580 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
581 mem
= change_address (ops
[0], TImode
, addr0
);
582 set_mem_alias_set (mem
, 0);
583 emit_move_insn (dst
, mem
);
584 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
585 if (start
+ width
> MEM_ALIGN (ops
[0]))
587 rtx shl
= gen_reg_rtx (SImode
);
588 rtx mask1
= gen_reg_rtx (TImode
);
589 rtx dst1
= gen_reg_rtx (TImode
);
591 addr1
= plus_constant (Pmode
, addr
, 16);
592 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
593 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
594 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
595 mem1
= change_address (ops
[0], TImode
, addr1
);
596 set_mem_alias_set (mem1
, 0);
597 emit_move_insn (dst1
, mem1
);
598 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
599 emit_move_insn (mem1
, dst1
);
601 emit_move_insn (mem
, dst
);
604 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
609 spu_expand_block_move (rtx ops
[])
611 HOST_WIDE_INT bytes
, align
, offset
;
612 rtx src
, dst
, sreg
, dreg
, target
;
614 if (GET_CODE (ops
[2]) != CONST_INT
615 || GET_CODE (ops
[3]) != CONST_INT
616 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
619 bytes
= INTVAL (ops
[2]);
620 align
= INTVAL (ops
[3]);
630 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
632 dst
= adjust_address (ops
[0], V16QImode
, offset
);
633 src
= adjust_address (ops
[1], V16QImode
, offset
);
634 emit_move_insn (dst
, src
);
639 unsigned char arr
[16] = { 0 };
640 for (i
= 0; i
< bytes
- offset
; i
++)
642 dst
= adjust_address (ops
[0], V16QImode
, offset
);
643 src
= adjust_address (ops
[1], V16QImode
, offset
);
644 mask
= gen_reg_rtx (V16QImode
);
645 sreg
= gen_reg_rtx (V16QImode
);
646 dreg
= gen_reg_rtx (V16QImode
);
647 target
= gen_reg_rtx (V16QImode
);
648 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
649 emit_move_insn (dreg
, dst
);
650 emit_move_insn (sreg
, src
);
651 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
652 emit_move_insn (dst
, target
);
660 { SPU_EQ
, SPU_GT
, SPU_GTU
};
662 int spu_comp_icode
[12][3] = {
663 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
664 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
665 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
666 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
667 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
668 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
669 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
670 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
671 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
672 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
673 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
674 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
677 /* Generate a compare for CODE. Return a brand-new rtx that represents
678 the result of the compare. GCC can figure this out too if we don't
679 provide all variations of compares, but GCC always wants to use
680 WORD_MODE, we can generate better code in most cases if we do it
683 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
685 int reverse_compare
= 0;
686 int reverse_test
= 0;
687 rtx compare_result
, eq_result
;
688 rtx comp_rtx
, eq_rtx
;
689 machine_mode comp_mode
;
690 machine_mode op_mode
;
691 enum spu_comp_code scode
, eq_code
;
692 enum insn_code ior_code
;
693 enum rtx_code code
= GET_CODE (cmp
);
694 rtx op0
= XEXP (cmp
, 0);
695 rtx op1
= XEXP (cmp
, 1);
699 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
700 and so on, to keep the constant in operand 1. */
701 if (GET_CODE (op1
) == CONST_INT
)
703 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
704 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
728 /* However, if we generate an integer result, performing a reverse test
729 would require an extra negation, so avoid that where possible. */
730 if (GET_CODE (op1
) == CONST_INT
&& is_set
== 1)
732 HOST_WIDE_INT val
= INTVAL (op1
) + 1;
733 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
750 op_mode
= GET_MODE (op0
);
756 if (HONOR_NANS (op_mode
))
771 if (HONOR_NANS (op_mode
))
863 comp_mode
= V4SImode
;
867 comp_mode
= V2DImode
;
874 if (GET_MODE (op1
) == DFmode
875 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
878 if (is_set
== 0 && op1
== const0_rtx
879 && (GET_MODE (op0
) == SImode
880 || GET_MODE (op0
) == HImode
881 || GET_MODE (op0
) == QImode
) && scode
== SPU_EQ
)
883 /* Don't need to set a register with the result when we are
884 comparing against zero and branching. */
885 reverse_test
= !reverse_test
;
886 compare_result
= op0
;
890 compare_result
= gen_reg_rtx (comp_mode
);
899 if (spu_comp_icode
[index
][scode
] == 0)
902 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
904 op0
= force_reg (op_mode
, op0
);
905 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
907 op1
= force_reg (op_mode
, op1
);
908 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
912 emit_insn (comp_rtx
);
916 eq_result
= gen_reg_rtx (comp_mode
);
917 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
922 ior_code
= optab_handler (ior_optab
, comp_mode
);
923 gcc_assert (ior_code
!= CODE_FOR_nothing
);
924 emit_insn (GEN_FCN (ior_code
)
925 (compare_result
, compare_result
, eq_result
));
934 /* We don't have branch on QI compare insns, so we convert the
935 QI compare result to a HI result. */
936 if (comp_mode
== QImode
)
938 rtx old_res
= compare_result
;
939 compare_result
= gen_reg_rtx (HImode
);
941 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
945 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
947 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
949 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
950 emit_jump_insn (gen_rtx_SET (pc_rtx
,
951 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
954 else if (is_set
== 2)
956 rtx target
= operands
[0];
957 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
958 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
959 machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
961 rtx op_t
= operands
[2];
962 rtx op_f
= operands
[3];
964 /* The result of the comparison can be SI, HI or QI mode. Create a
965 mask based on that result. */
966 if (target_size
> compare_size
)
968 select_mask
= gen_reg_rtx (mode
);
969 emit_insn (gen_extend_compare (select_mask
, compare_result
));
971 else if (target_size
< compare_size
)
973 gen_rtx_SUBREG (mode
, compare_result
,
974 (compare_size
- target_size
) / BITS_PER_UNIT
);
975 else if (comp_mode
!= mode
)
976 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
978 select_mask
= compare_result
;
980 if (GET_MODE (target
) != GET_MODE (op_t
)
981 || GET_MODE (target
) != GET_MODE (op_f
))
985 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
987 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
991 rtx target
= operands
[0];
993 emit_insn (gen_rtx_SET (compare_result
,
994 gen_rtx_NOT (comp_mode
, compare_result
)));
995 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
996 emit_insn (gen_extendhisi2 (target
, compare_result
));
997 else if (GET_MODE (target
) == SImode
998 && GET_MODE (compare_result
) == QImode
)
999 emit_insn (gen_extend_compare (target
, compare_result
));
1001 emit_move_insn (target
, compare_result
);
1006 const_double_to_hwint (rtx x
)
1010 if (GET_MODE (x
) == SFmode
)
1012 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1013 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1015 else if (GET_MODE (x
) == DFmode
)
1018 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1019 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1021 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1029 hwint_to_const_double (machine_mode mode
, HOST_WIDE_INT v
)
1033 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1036 tv
[0] = (v
<< 32) >> 32;
1037 else if (mode
== DFmode
)
1039 tv
[1] = (v
<< 32) >> 32;
1042 real_from_target (&rv
, tv
, mode
);
1043 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1047 print_operand_address (FILE * file
, register rtx addr
)
1052 if (GET_CODE (addr
) == AND
1053 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1054 && INTVAL (XEXP (addr
, 1)) == -16)
1055 addr
= XEXP (addr
, 0);
1057 switch (GET_CODE (addr
))
1060 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1064 reg
= XEXP (addr
, 0);
1065 offset
= XEXP (addr
, 1);
1066 if (GET_CODE (offset
) == REG
)
1068 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1069 reg_names
[REGNO (offset
)]);
1071 else if (GET_CODE (offset
) == CONST_INT
)
1073 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1074 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1084 output_addr_const (file
, addr
);
1094 print_operand (FILE * file
, rtx x
, int code
)
1096 machine_mode mode
= GET_MODE (x
);
1098 unsigned char arr
[16];
1099 int xcode
= GET_CODE (x
);
1101 if (GET_MODE (x
) == VOIDmode
)
1104 case 'L': /* 128 bits, signed */
1105 case 'm': /* 128 bits, signed */
1106 case 'T': /* 128 bits, signed */
1107 case 't': /* 128 bits, signed */
1110 case 'K': /* 64 bits, signed */
1111 case 'k': /* 64 bits, signed */
1112 case 'D': /* 64 bits, signed */
1113 case 'd': /* 64 bits, signed */
1116 case 'J': /* 32 bits, signed */
1117 case 'j': /* 32 bits, signed */
1118 case 's': /* 32 bits, signed */
1119 case 'S': /* 32 bits, signed */
1126 case 'j': /* 32 bits, signed */
1127 case 'k': /* 64 bits, signed */
1128 case 'm': /* 128 bits, signed */
1129 if (xcode
== CONST_INT
1130 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1132 gcc_assert (logical_immediate_p (x
, mode
));
1133 constant_to_array (mode
, x
, arr
);
1134 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1135 val
= trunc_int_for_mode (val
, SImode
);
1136 switch (which_logical_immediate (val
))
1141 fprintf (file
, "h");
1144 fprintf (file
, "b");
1154 case 'J': /* 32 bits, signed */
1155 case 'K': /* 64 bits, signed */
1156 case 'L': /* 128 bits, signed */
1157 if (xcode
== CONST_INT
1158 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1160 gcc_assert (logical_immediate_p (x
, mode
)
1161 || iohl_immediate_p (x
, mode
));
1162 constant_to_array (mode
, x
, arr
);
1163 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1164 val
= trunc_int_for_mode (val
, SImode
);
1165 switch (which_logical_immediate (val
))
1171 val
= trunc_int_for_mode (val
, HImode
);
1174 val
= trunc_int_for_mode (val
, QImode
);
1179 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1185 case 't': /* 128 bits, signed */
1186 case 'd': /* 64 bits, signed */
1187 case 's': /* 32 bits, signed */
1190 enum immediate_class c
= classify_immediate (x
, mode
);
1194 constant_to_array (mode
, x
, arr
);
1195 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1196 val
= trunc_int_for_mode (val
, SImode
);
1197 switch (which_immediate_load (val
))
1202 fprintf (file
, "a");
1205 fprintf (file
, "h");
1208 fprintf (file
, "hu");
1215 constant_to_array (mode
, x
, arr
);
1216 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1218 fprintf (file
, "b");
1220 fprintf (file
, "h");
1222 fprintf (file
, "w");
1224 fprintf (file
, "d");
1227 if (xcode
== CONST_VECTOR
)
1229 x
= CONST_VECTOR_ELT (x
, 0);
1230 xcode
= GET_CODE (x
);
1232 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1233 fprintf (file
, "a");
1234 else if (xcode
== HIGH
)
1235 fprintf (file
, "hu");
1249 case 'T': /* 128 bits, signed */
1250 case 'D': /* 64 bits, signed */
1251 case 'S': /* 32 bits, signed */
1254 enum immediate_class c
= classify_immediate (x
, mode
);
1258 constant_to_array (mode
, x
, arr
);
1259 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1260 val
= trunc_int_for_mode (val
, SImode
);
1261 switch (which_immediate_load (val
))
1268 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1273 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1276 constant_to_array (mode
, x
, arr
);
1278 for (i
= 0; i
< 16; i
++)
1283 print_operand (file
, GEN_INT (val
), 0);
1286 constant_to_array (mode
, x
, arr
);
1287 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1288 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1293 if (GET_CODE (x
) == CONST_VECTOR
)
1294 x
= CONST_VECTOR_ELT (x
, 0);
1295 output_addr_const (file
, x
);
1297 fprintf (file
, "@h");
1311 if (xcode
== CONST_INT
)
1313 /* Only 4 least significant bits are relevant for generate
1314 control word instructions. */
1315 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1320 case 'M': /* print code for c*d */
1321 if (GET_CODE (x
) == CONST_INT
)
1325 fprintf (file
, "b");
1328 fprintf (file
, "h");
1331 fprintf (file
, "w");
1334 fprintf (file
, "d");
1343 case 'N': /* Negate the operand */
1344 if (xcode
== CONST_INT
)
1345 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1346 else if (xcode
== CONST_VECTOR
)
1347 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1348 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1351 case 'I': /* enable/disable interrupts */
1352 if (xcode
== CONST_INT
)
1353 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1356 case 'b': /* branch modifiers */
1358 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1359 else if (COMPARISON_P (x
))
1360 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1363 case 'i': /* indirect call */
1366 if (GET_CODE (XEXP (x
, 0)) == REG
)
1367 /* Used in indirect function calls. */
1368 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1370 output_address (XEXP (x
, 0));
1374 case 'p': /* load/store */
1378 xcode
= GET_CODE (x
);
1383 xcode
= GET_CODE (x
);
1386 fprintf (file
, "d");
1387 else if (xcode
== CONST_INT
)
1388 fprintf (file
, "a");
1389 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1390 fprintf (file
, "r");
1391 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1393 if (GET_CODE (XEXP (x
, 1)) == REG
)
1394 fprintf (file
, "x");
1396 fprintf (file
, "d");
1401 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1403 output_addr_const (file
, GEN_INT (val
));
1407 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1409 output_addr_const (file
, GEN_INT (val
));
1413 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1415 output_addr_const (file
, GEN_INT (val
));
1419 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1420 val
= (val
>> 3) & 0x1f;
1421 output_addr_const (file
, GEN_INT (val
));
1425 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1428 output_addr_const (file
, GEN_INT (val
));
1432 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1435 output_addr_const (file
, GEN_INT (val
));
1439 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1442 output_addr_const (file
, GEN_INT (val
));
1446 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1447 val
= -(val
& -8ll);
1448 val
= (val
>> 3) & 0x1f;
1449 output_addr_const (file
, GEN_INT (val
));
1454 constant_to_array (mode
, x
, arr
);
1455 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1456 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1461 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1462 else if (xcode
== MEM
)
1463 output_address (XEXP (x
, 0));
1464 else if (xcode
== CONST_VECTOR
)
1465 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1467 output_addr_const (file
, x
);
1474 output_operand_lossage ("invalid %%xn code");
1479 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1480 caller saved register. For leaf functions it is more efficient to
1481 use a volatile register because we won't need to save and restore the
1482 pic register. This routine is only valid after register allocation
1483 is completed, so we can pick an unused register. */
1487 if (!reload_completed
&& !reload_in_progress
)
1490 /* If we've already made the decision, we need to keep with it. Once we've
1491 decided to use LAST_ARG_REGNUM, future calls to df_regs_ever_live_p may
1492 return true since the register is now live; this should not cause us to
1493 "switch back" to using pic_offset_table_rtx. */
1494 if (!cfun
->machine
->pic_reg
)
1496 if (crtl
->is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1497 cfun
->machine
->pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1499 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1502 return cfun
->machine
->pic_reg
;
1505 /* Split constant addresses to handle cases that are too large.
1506 Add in the pic register when in PIC mode.
1507 Split immediates that require more than 1 instruction. */
1509 spu_split_immediate (rtx
* ops
)
1511 machine_mode mode
= GET_MODE (ops
[0]);
1512 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1518 unsigned char arrhi
[16];
1519 unsigned char arrlo
[16];
1520 rtx to
, temp
, hi
, lo
;
1522 machine_mode imode
= mode
;
1523 /* We need to do reals as ints because the constant used in the
1524 IOR might not be a legitimate real constant. */
1525 imode
= int_mode_for_mode (mode
);
1526 constant_to_array (mode
, ops
[1], arrhi
);
1528 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1531 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1532 for (i
= 0; i
< 16; i
+= 4)
1534 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1535 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1536 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1537 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1539 hi
= array_to_constant (imode
, arrhi
);
1540 lo
= array_to_constant (imode
, arrlo
);
1541 emit_move_insn (temp
, hi
);
1542 emit_insn (gen_rtx_SET (to
, gen_rtx_IOR (imode
, temp
, lo
)));
1547 unsigned char arr_fsmbi
[16];
1548 unsigned char arr_andbi
[16];
1549 rtx to
, reg_fsmbi
, reg_and
;
1551 machine_mode imode
= mode
;
1552 /* We need to do reals as ints because the constant used in the
1553 * AND might not be a legitimate real constant. */
1554 imode
= int_mode_for_mode (mode
);
1555 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1557 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1560 for (i
= 0; i
< 16; i
++)
1561 if (arr_fsmbi
[i
] != 0)
1563 arr_andbi
[0] = arr_fsmbi
[i
];
1564 arr_fsmbi
[i
] = 0xff;
1566 for (i
= 1; i
< 16; i
++)
1567 arr_andbi
[i
] = arr_andbi
[0];
1568 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1569 reg_and
= array_to_constant (imode
, arr_andbi
);
1570 emit_move_insn (to
, reg_fsmbi
);
1571 emit_insn (gen_rtx_SET (to
, gen_rtx_AND (imode
, to
, reg_and
)));
1575 if (reload_in_progress
|| reload_completed
)
1577 rtx mem
= force_const_mem (mode
, ops
[1]);
1578 if (TARGET_LARGE_MEM
)
1580 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1581 emit_move_insn (addr
, XEXP (mem
, 0));
1582 mem
= replace_equiv_address (mem
, addr
);
1584 emit_move_insn (ops
[0], mem
);
1590 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1594 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1595 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1598 emit_insn (gen_pic (ops
[0], ops
[1]));
1601 rtx pic_reg
= get_pic_reg ();
1602 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1604 return flag_pic
|| c
== IC_IL2s
;
1615 /* SAVING is TRUE when we are generating the actual load and store
1616 instructions for REGNO. When determining the size of the stack
1617 needed for saving register we must allocate enough space for the
1618 worst case, because we don't always have the information early enough
1619 to not allocate it. But we can at least eliminate the actual loads
1620 and stores during the prologue/epilogue. */
1622 need_to_save_reg (int regno
, int saving
)
1624 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1627 && regno
== PIC_OFFSET_TABLE_REGNUM
1628 && (!saving
|| cfun
->machine
->pic_reg
== pic_offset_table_rtx
))
1633 /* This function is only correct starting with local register
1636 spu_saved_regs_size (void)
1638 int reg_save_size
= 0;
1641 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1642 if (need_to_save_reg (regno
, 0))
1643 reg_save_size
+= 0x10;
1644 return reg_save_size
;
1648 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1650 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1652 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1653 return emit_insn (gen_movv4si (mem
, reg
));
1657 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1659 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1661 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1662 return emit_insn (gen_movv4si (reg
, mem
));
1665 /* This happens after reload, so we need to expand it. */
1667 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1670 if (satisfies_constraint_K (GEN_INT (imm
)))
1672 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1676 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1677 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1678 if (REGNO (src
) == REGNO (scratch
))
1684 /* Return nonzero if this function is known to have a null epilogue. */
1687 direct_return (void)
1689 if (reload_completed
)
1691 if (cfun
->static_chain_decl
== 0
1692 && (spu_saved_regs_size ()
1694 + crtl
->outgoing_args_size
1695 + crtl
->args
.pretend_args_size
== 0)
1703 The stack frame looks like this:
1707 AP -> +-------------+
1710 prev SP | back chain |
1713 | reg save | crtl->args.pretend_args_size bytes
1716 | saved regs | spu_saved_regs_size() bytes
1717 FP -> +-------------+
1719 | vars | get_frame_size() bytes
1720 HFP -> +-------------+
1723 | args | crtl->outgoing_args_size bytes
1729 SP -> +-------------+
1733 spu_expand_prologue (void)
1735 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1736 HOST_WIDE_INT total_size
;
1737 HOST_WIDE_INT saved_regs_size
;
1738 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1739 rtx scratch_reg_0
, scratch_reg_1
;
1743 if (flag_pic
&& optimize
== 0 && !cfun
->machine
->pic_reg
)
1744 cfun
->machine
->pic_reg
= pic_offset_table_rtx
;
1746 if (spu_naked_function_p (current_function_decl
))
1749 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1750 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1752 saved_regs_size
= spu_saved_regs_size ();
1753 total_size
= size
+ saved_regs_size
1754 + crtl
->outgoing_args_size
1755 + crtl
->args
.pretend_args_size
;
1758 || cfun
->calls_alloca
|| total_size
> 0)
1759 total_size
+= STACK_POINTER_OFFSET
;
1761 /* Save this first because code after this might use the link
1762 register as a scratch register. */
1765 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1766 RTX_FRAME_RELATED_P (insn
) = 1;
1771 offset
= -crtl
->args
.pretend_args_size
;
1772 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1773 if (need_to_save_reg (regno
, 1))
1776 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1777 RTX_FRAME_RELATED_P (insn
) = 1;
1781 if (flag_pic
&& cfun
->machine
->pic_reg
)
1783 rtx pic_reg
= cfun
->machine
->pic_reg
;
1784 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1785 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1790 if (flag_stack_check
)
1792 /* We compare against total_size-1 because
1793 ($sp >= total_size) <=> ($sp > total_size-1) */
1794 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1795 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1796 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1797 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1799 emit_move_insn (scratch_v4si
, size_v4si
);
1800 size_v4si
= scratch_v4si
;
1802 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1803 emit_insn (gen_vec_extractv4si
1804 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1805 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1808 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1809 the value of the previous $sp because we save it as the back
1811 if (total_size
<= 2000)
1813 /* In this case we save the back chain first. */
1814 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1816 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1820 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1822 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1824 RTX_FRAME_RELATED_P (insn
) = 1;
1825 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1826 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1828 if (total_size
> 2000)
1830 /* Save the back chain ptr */
1831 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1834 if (frame_pointer_needed
)
1836 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1837 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1838 + crtl
->outgoing_args_size
;
1839 /* Set the new frame_pointer */
1840 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1841 RTX_FRAME_RELATED_P (insn
) = 1;
1842 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1843 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1844 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1848 if (flag_stack_usage_info
)
1849 current_function_static_stack_size
= total_size
;
1853 spu_expand_epilogue (bool sibcall_p
)
1855 int size
= get_frame_size (), offset
, regno
;
1856 HOST_WIDE_INT saved_regs_size
, total_size
;
1857 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1860 if (spu_naked_function_p (current_function_decl
))
1863 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1865 saved_regs_size
= spu_saved_regs_size ();
1866 total_size
= size
+ saved_regs_size
1867 + crtl
->outgoing_args_size
1868 + crtl
->args
.pretend_args_size
;
1871 || cfun
->calls_alloca
|| total_size
> 0)
1872 total_size
+= STACK_POINTER_OFFSET
;
1876 if (cfun
->calls_alloca
)
1877 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1879 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1882 if (saved_regs_size
> 0)
1884 offset
= -crtl
->args
.pretend_args_size
;
1885 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1886 if (need_to_save_reg (regno
, 1))
1889 frame_emit_load (regno
, sp_reg
, offset
);
1895 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1899 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1900 emit_jump_insn (gen__return ());
1905 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1909 /* This is inefficient because it ends up copying to a save-register
1910 which then gets saved even though $lr has already been saved. But
1911 it does generate better code for leaf functions and we don't need
1912 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1913 used for __builtin_return_address anyway, so maybe we don't care if
1914 it's inefficient. */
1915 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1919 /* Given VAL, generate a constant appropriate for MODE.
1920 If MODE is a vector mode, every element will be VAL.
1921 For TImode, VAL will be zero extended to 128 bits. */
1923 spu_const (machine_mode mode
, HOST_WIDE_INT val
)
1929 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1930 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1931 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1932 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1934 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1935 return immed_double_const (val
, 0, mode
);
1937 /* val is the bit representation of the float */
1938 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1939 return hwint_to_const_double (mode
, val
);
1941 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1942 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1944 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1946 units
= GET_MODE_NUNITS (mode
);
1948 v
= rtvec_alloc (units
);
1950 for (i
= 0; i
< units
; ++i
)
1951 RTVEC_ELT (v
, i
) = inner
;
1953 return gen_rtx_CONST_VECTOR (mode
, v
);
1956 /* Create a MODE vector constant from 4 ints. */
1958 spu_const_from_ints(machine_mode mode
, int a
, int b
, int c
, int d
)
1960 unsigned char arr
[16];
1961 arr
[0] = (a
>> 24) & 0xff;
1962 arr
[1] = (a
>> 16) & 0xff;
1963 arr
[2] = (a
>> 8) & 0xff;
1964 arr
[3] = (a
>> 0) & 0xff;
1965 arr
[4] = (b
>> 24) & 0xff;
1966 arr
[5] = (b
>> 16) & 0xff;
1967 arr
[6] = (b
>> 8) & 0xff;
1968 arr
[7] = (b
>> 0) & 0xff;
1969 arr
[8] = (c
>> 24) & 0xff;
1970 arr
[9] = (c
>> 16) & 0xff;
1971 arr
[10] = (c
>> 8) & 0xff;
1972 arr
[11] = (c
>> 0) & 0xff;
1973 arr
[12] = (d
>> 24) & 0xff;
1974 arr
[13] = (d
>> 16) & 0xff;
1975 arr
[14] = (d
>> 8) & 0xff;
1976 arr
[15] = (d
>> 0) & 0xff;
1977 return array_to_constant(mode
, arr
);
1980 /* branch hint stuff */
1982 /* An array of these is used to propagate hints to predecessor blocks. */
1985 rtx_insn
*prop_jump
; /* propagated from another block */
1986 int bb_index
; /* the original block. */
1988 static struct spu_bb_info
*spu_bb_info
;
1990 #define STOP_HINT_P(INSN) \
1992 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
1993 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
1995 /* 1 when RTX is a hinted branch or its target. We keep track of
1996 what has been hinted so the safe-hint code can test it easily. */
1997 #define HINTED_P(RTX) \
1998 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2000 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2001 #define SCHED_ON_EVEN_P(RTX) \
2002 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2004 /* Emit a nop for INSN such that the two will dual issue. This assumes
2005 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2006 We check for TImode to handle a MULTI1 insn which has dual issued its
2007 first instruction. get_pipe returns -1 for MULTI0 or inline asm. */
2009 emit_nop_for_insn (rtx_insn
*insn
)
2014 /* We need to handle JUMP_TABLE_DATA separately. */
2015 if (JUMP_TABLE_DATA_P (insn
))
2017 new_insn
= emit_insn_after (gen_lnop(), insn
);
2018 recog_memoized (new_insn
);
2019 INSN_LOCATION (new_insn
) = UNKNOWN_LOCATION
;
2023 p
= get_pipe (insn
);
2024 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2025 new_insn
= emit_insn_after (gen_lnop (), insn
);
2026 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2028 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2029 PUT_MODE (new_insn
, TImode
);
2030 PUT_MODE (insn
, VOIDmode
);
2033 new_insn
= emit_insn_after (gen_lnop (), insn
);
2034 recog_memoized (new_insn
);
2035 INSN_LOCATION (new_insn
) = INSN_LOCATION (insn
);
2038 /* Insert nops in basic blocks to meet dual issue alignment
2039 requirements. Also make sure hbrp and hint instructions are at least
2040 one cycle apart, possibly inserting a nop. */
2044 rtx_insn
*insn
, *next_insn
, *prev_insn
, *hbr_insn
= 0;
2048 /* This sets up INSN_ADDRESSES. */
2049 shorten_branches (get_insns ());
2051 /* Keep track of length added by nops. */
2055 insn
= get_insns ();
2056 if (!active_insn_p (insn
))
2057 insn
= next_active_insn (insn
);
2058 for (; insn
; insn
= next_insn
)
2060 next_insn
= next_active_insn (insn
);
2061 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2062 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2066 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2067 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2068 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2071 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2072 PUT_MODE (prev_insn
, GET_MODE (insn
));
2073 PUT_MODE (insn
, TImode
);
2074 INSN_LOCATION (prev_insn
) = INSN_LOCATION (insn
);
2080 if (INSN_CODE (insn
) == CODE_FOR_blockage
&& next_insn
)
2082 if (GET_MODE (insn
) == TImode
)
2083 PUT_MODE (next_insn
, TImode
);
2085 next_insn
= next_active_insn (insn
);
2087 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2088 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2090 if (((addr
+ length
) & 7) != 0)
2092 emit_nop_for_insn (prev_insn
);
2096 else if (GET_MODE (insn
) == TImode
2097 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2098 || get_attr_type (insn
) == TYPE_MULTI0
)
2099 && ((addr
+ length
) & 7) != 0)
2101 /* prev_insn will always be set because the first insn is
2102 always 8-byte aligned. */
2103 emit_nop_for_insn (prev_insn
);
2111 /* Routines for branch hints. */
2114 spu_emit_branch_hint (rtx_insn
*before
, rtx_insn
*branch
, rtx target
,
2115 int distance
, sbitmap blocks
)
2117 rtx branch_label
= 0;
2120 rtx_jump_table_data
*table
;
2122 if (before
== 0 || branch
== 0 || target
== 0)
2125 /* While scheduling we require hints to be no further than 600, so
2126 we need to enforce that here too */
2130 /* If we have a Basic block note, emit it after the basic block note. */
2131 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2132 before
= NEXT_INSN (before
);
2134 branch_label
= gen_label_rtx ();
2135 LABEL_NUSES (branch_label
)++;
2136 LABEL_PRESERVE_P (branch_label
) = 1;
2137 insn
= emit_label_before (branch_label
, branch
);
2138 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2139 bitmap_set_bit (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2141 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2142 recog_memoized (hint
);
2143 INSN_LOCATION (hint
) = INSN_LOCATION (branch
);
2144 HINTED_P (branch
) = 1;
2146 if (GET_CODE (target
) == LABEL_REF
)
2147 HINTED_P (XEXP (target
, 0)) = 1;
2148 else if (tablejump_p (branch
, 0, &table
))
2152 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2153 vec
= XVEC (PATTERN (table
), 0);
2155 vec
= XVEC (PATTERN (table
), 1);
2156 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2157 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2160 if (distance
>= 588)
2162 /* Make sure the hint isn't scheduled any earlier than this point,
2163 which could make it too far for the branch offest to fit */
2164 insn
= emit_insn_before (gen_blockage (), hint
);
2165 recog_memoized (insn
);
2166 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2168 else if (distance
<= 8 * 4)
2170 /* To guarantee at least 8 insns between the hint and branch we
2173 for (d
= distance
; d
< 8 * 4; d
+= 4)
2176 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2177 recog_memoized (insn
);
2178 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2181 /* Make sure any nops inserted aren't scheduled before the hint. */
2182 insn
= emit_insn_after (gen_blockage (), hint
);
2183 recog_memoized (insn
);
2184 INSN_LOCATION (insn
) = INSN_LOCATION (hint
);
2186 /* Make sure any nops inserted aren't scheduled after the call. */
2187 if (CALL_P (branch
) && distance
< 8 * 4)
2189 insn
= emit_insn_before (gen_blockage (), branch
);
2190 recog_memoized (insn
);
2191 INSN_LOCATION (insn
) = INSN_LOCATION (branch
);
2196 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2197 the rtx for the branch target. */
2199 get_branch_target (rtx_insn
*branch
)
2201 if (JUMP_P (branch
))
2205 /* Return statements */
2206 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2207 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2210 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2213 set
= single_set (branch
);
2214 src
= SET_SRC (set
);
2215 if (GET_CODE (SET_DEST (set
)) != PC
)
2218 if (GET_CODE (src
) == IF_THEN_ELSE
)
2221 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2224 /* If the more probable case is not a fall through, then
2225 try a branch hint. */
2226 int prob
= XINT (note
, 0);
2227 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2228 && GET_CODE (XEXP (src
, 1)) != PC
)
2229 lab
= XEXP (src
, 1);
2230 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2231 && GET_CODE (XEXP (src
, 2)) != PC
)
2232 lab
= XEXP (src
, 2);
2236 if (GET_CODE (lab
) == RETURN
)
2237 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2245 else if (CALL_P (branch
))
2248 /* All of our call patterns are in a PARALLEL and the CALL is
2249 the first pattern in the PARALLEL. */
2250 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2252 call
= XVECEXP (PATTERN (branch
), 0, 0);
2253 if (GET_CODE (call
) == SET
)
2254 call
= SET_SRC (call
);
2255 if (GET_CODE (call
) != CALL
)
2257 return XEXP (XEXP (call
, 0), 0);
2262 /* The special $hbr register is used to prevent the insn scheduler from
2263 moving hbr insns across instructions which invalidate them. It
2264 should only be used in a clobber, and this function searches for
2265 insns which clobber it. */
2267 insn_clobbers_hbr (rtx_insn
*insn
)
2270 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2272 rtx parallel
= PATTERN (insn
);
2275 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2277 clobber
= XVECEXP (parallel
, 0, j
);
2278 if (GET_CODE (clobber
) == CLOBBER
2279 && GET_CODE (XEXP (clobber
, 0)) == REG
2280 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2287 /* Search up to 32 insns starting at FIRST:
2288 - at any kind of hinted branch, just return
2289 - at any unconditional branch in the first 15 insns, just return
2290 - at a call or indirect branch, after the first 15 insns, force it to
2291 an even address and return
2292 - at any unconditional branch, after the first 15 insns, force it to
2294 At then end of the search, insert an hbrp within 4 insns of FIRST,
2295 and an hbrp within 16 instructions of FIRST.
2298 insert_hbrp_for_ilb_runout (rtx_insn
*first
)
2300 rtx_insn
*insn
, *before_4
= 0, *before_16
= 0;
2301 int addr
= 0, length
, first_addr
= -1;
2302 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2303 int insert_lnop_after
= 0;
2304 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2307 if (first_addr
== -1)
2308 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2309 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2310 length
= get_attr_length (insn
);
2312 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2314 /* We test for 14 instructions because the first hbrp will add
2315 up to 2 instructions. */
2316 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2319 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2321 /* Make sure an hbrp is at least 2 cycles away from a hint.
2322 Insert an lnop after the hbrp when necessary. */
2323 if (before_4
== 0 && addr
> 0)
2326 insert_lnop_after
|= 1;
2328 else if (before_4
&& addr
<= 4 * 4)
2329 insert_lnop_after
|= 1;
2330 if (before_16
== 0 && addr
> 10 * 4)
2333 insert_lnop_after
|= 2;
2335 else if (before_16
&& addr
<= 14 * 4)
2336 insert_lnop_after
|= 2;
2339 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2341 if (addr
< hbrp_addr0
)
2343 else if (addr
< hbrp_addr1
)
2347 if (CALL_P (insn
) || JUMP_P (insn
))
2349 if (HINTED_P (insn
))
2352 /* Any branch after the first 15 insns should be on an even
2353 address to avoid a special case branch. There might be
2354 some nops and/or hbrps inserted, so we test after 10
2357 SCHED_ON_EVEN_P (insn
) = 1;
2360 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2364 if (addr
+ length
>= 32 * 4)
2366 gcc_assert (before_4
&& before_16
);
2367 if (hbrp_addr0
> 4 * 4)
2370 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2371 recog_memoized (insn
);
2372 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2373 INSN_ADDRESSES_NEW (insn
,
2374 INSN_ADDRESSES (INSN_UID (before_4
)));
2375 PUT_MODE (insn
, GET_MODE (before_4
));
2376 PUT_MODE (before_4
, TImode
);
2377 if (insert_lnop_after
& 1)
2379 insn
= emit_insn_before (gen_lnop (), before_4
);
2380 recog_memoized (insn
);
2381 INSN_LOCATION (insn
) = INSN_LOCATION (before_4
);
2382 INSN_ADDRESSES_NEW (insn
,
2383 INSN_ADDRESSES (INSN_UID (before_4
)));
2384 PUT_MODE (insn
, TImode
);
2387 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2388 && hbrp_addr1
> 16 * 4)
2391 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2392 recog_memoized (insn
);
2393 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2394 INSN_ADDRESSES_NEW (insn
,
2395 INSN_ADDRESSES (INSN_UID (before_16
)));
2396 PUT_MODE (insn
, GET_MODE (before_16
));
2397 PUT_MODE (before_16
, TImode
);
2398 if (insert_lnop_after
& 2)
2400 insn
= emit_insn_before (gen_lnop (), before_16
);
2401 recog_memoized (insn
);
2402 INSN_LOCATION (insn
) = INSN_LOCATION (before_16
);
2403 INSN_ADDRESSES_NEW (insn
,
2404 INSN_ADDRESSES (INSN_UID
2406 PUT_MODE (insn
, TImode
);
2412 else if (BARRIER_P (insn
))
2417 /* The SPU might hang when it executes 48 inline instructions after a
2418 hinted branch jumps to its hinted target. The beginning of a
2419 function and the return from a call might have been hinted, and
2420 must be handled as well. To prevent a hang we insert 2 hbrps. The
2421 first should be within 6 insns of the branch target. The second
2422 should be within 22 insns of the branch target. When determining
2423 if hbrps are necessary, we look for only 32 inline instructions,
2424 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2425 when inserting new hbrps, we insert them within 4 and 16 insns of
2431 if (TARGET_SAFE_HINTS
)
2433 shorten_branches (get_insns ());
2434 /* Insert hbrp at beginning of function */
2435 insn
= next_active_insn (get_insns ());
2437 insert_hbrp_for_ilb_runout (insn
);
2438 /* Insert hbrp after hinted targets. */
2439 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2440 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2441 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2445 static int in_spu_reorg
;
2448 spu_var_tracking (void)
2450 if (flag_var_tracking
)
2453 timevar_push (TV_VAR_TRACKING
);
2454 variable_tracking_main ();
2455 timevar_pop (TV_VAR_TRACKING
);
2456 df_finish_pass (false);
2460 /* Insert branch hints. There are no branch optimizations after this
2461 pass, so it's safe to set our branch hints now. */
2463 spu_machine_dependent_reorg (void)
2467 rtx_insn
*branch
, *insn
;
2468 rtx branch_target
= 0;
2469 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2473 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2475 /* We still do it for unoptimized code because an external
2476 function might have hinted a call or return. */
2477 compute_bb_for_insn ();
2480 spu_var_tracking ();
2481 free_bb_for_insn ();
2485 blocks
= sbitmap_alloc (last_basic_block_for_fn (cfun
));
2486 bitmap_clear (blocks
);
2489 compute_bb_for_insn ();
2491 /* (Re-)discover loops so that bb->loop_father can be used
2492 in the analysis below. */
2493 loop_optimizer_init (AVOID_CFG_MODIFICATIONS
);
2498 (struct spu_bb_info
*) xcalloc (n_basic_blocks_for_fn (cfun
),
2499 sizeof (struct spu_bb_info
));
2501 /* We need exact insn addresses and lengths. */
2502 shorten_branches (get_insns ());
2504 for (i
= n_basic_blocks_for_fn (cfun
) - 1; i
>= 0; i
--)
2506 bb
= BASIC_BLOCK_FOR_FN (cfun
, i
);
2508 if (spu_bb_info
[i
].prop_jump
)
2510 branch
= spu_bb_info
[i
].prop_jump
;
2511 branch_target
= get_branch_target (branch
);
2512 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2513 required_dist
= spu_hint_dist
;
2515 /* Search from end of a block to beginning. In this loop, find
2516 jumps which need a branch and emit them only when:
2517 - it's an indirect branch and we're at the insn which sets
2519 - we're at an insn that will invalidate the hint. e.g., a
2520 call, another hint insn, inline asm that clobbers $hbr, and
2521 some inlined operations (divmodsi4). Don't consider jumps
2522 because they are only at the end of a block and are
2523 considered when we are deciding whether to propagate
2524 - we're getting too far away from the branch. The hbr insns
2525 only have a signed 10 bit offset
2526 We go back as far as possible so the branch will be considered
2527 for propagation when we get to the beginning of the block. */
2528 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2532 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2534 && ((GET_CODE (branch_target
) == REG
2535 && set_of (branch_target
, insn
) != NULL_RTX
)
2536 || insn_clobbers_hbr (insn
)
2537 || branch_addr
- insn_addr
> 600))
2539 rtx_insn
*next
= NEXT_INSN (insn
);
2540 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2541 if (insn
!= BB_END (bb
)
2542 && branch_addr
- next_addr
>= required_dist
)
2546 "hint for %i in block %i before %i\n",
2547 INSN_UID (branch
), bb
->index
,
2549 spu_emit_branch_hint (next
, branch
, branch_target
,
2550 branch_addr
- next_addr
, blocks
);
2555 /* JUMP_P will only be true at the end of a block. When
2556 branch is already set it means we've previously decided
2557 to propagate a hint for that branch into this block. */
2558 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2561 if ((branch_target
= get_branch_target (insn
)))
2564 branch_addr
= insn_addr
;
2565 required_dist
= spu_hint_dist
;
2569 if (insn
== BB_HEAD (bb
))
2575 /* If we haven't emitted a hint for this branch yet, it might
2576 be profitable to emit it in one of the predecessor blocks,
2577 especially for loops. */
2579 basic_block prev
= 0, prop
= 0, prev2
= 0;
2580 int loop_exit
= 0, simple_loop
= 0;
2581 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2583 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2584 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2585 prev
= EDGE_PRED (bb
, j
)->src
;
2587 prev2
= EDGE_PRED (bb
, j
)->src
;
2589 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2590 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2592 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2595 /* If this branch is a loop exit then propagate to previous
2596 fallthru block. This catches the cases when it is a simple
2597 loop or when there is an initial branch into the loop. */
2598 if (prev
&& (loop_exit
|| simple_loop
)
2599 && bb_loop_depth (prev
) <= bb_loop_depth (bb
))
2602 /* If there is only one adjacent predecessor. Don't propagate
2603 outside this loop. */
2604 else if (prev
&& single_pred_p (bb
)
2605 && prev
->loop_father
== bb
->loop_father
)
2608 /* If this is the JOIN block of a simple IF-THEN then
2609 propagate the hint to the HEADER block. */
2610 else if (prev
&& prev2
2611 && EDGE_COUNT (bb
->preds
) == 2
2612 && EDGE_COUNT (prev
->preds
) == 1
2613 && EDGE_PRED (prev
, 0)->src
== prev2
2614 && prev2
->loop_father
== bb
->loop_father
2615 && GET_CODE (branch_target
) != REG
)
2618 /* Don't propagate when:
2619 - this is a simple loop and the hint would be too far
2620 - this is not a simple loop and there are 16 insns in
2622 - the predecessor block ends in a branch that will be
2624 - the predecessor block ends in an insn that invalidates
2628 && (bbend
= BB_END (prop
))
2629 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2630 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2631 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2634 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2635 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2636 bb
->index
, prop
->index
, bb_loop_depth (bb
),
2637 INSN_UID (branch
), loop_exit
, simple_loop
,
2638 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2640 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2641 spu_bb_info
[prop
->index
].bb_index
= i
;
2643 else if (branch_addr
- next_addr
>= required_dist
)
2646 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2647 INSN_UID (branch
), bb
->index
,
2648 INSN_UID (NEXT_INSN (insn
)));
2649 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2650 branch_addr
- next_addr
, blocks
);
2657 if (!bitmap_empty_p (blocks
))
2658 find_many_sub_basic_blocks (blocks
);
2660 /* We have to schedule to make sure alignment is ok. */
2661 FOR_EACH_BB_FN (bb
, cfun
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2663 /* The hints need to be scheduled, so call it again. */
2665 df_finish_pass (true);
2671 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2672 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2674 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2675 between its branch label and the branch . We don't move the
2676 label because GCC expects it at the beginning of the block. */
2677 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2678 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2679 rtx_insn
*label
= as_a
<rtx_insn
*> (XEXP (label_ref
, 0));
2682 for (branch
= NEXT_INSN (label
);
2683 !JUMP_P (branch
) && !CALL_P (branch
);
2684 branch
= NEXT_INSN (branch
))
2685 if (NONJUMP_INSN_P (branch
))
2686 offset
+= get_attr_length (branch
);
2688 XVECEXP (unspec
, 0, 0) = plus_constant (Pmode
, label_ref
, offset
);
2691 spu_var_tracking ();
2693 loop_optimizer_finalize ();
2695 free_bb_for_insn ();
2701 /* Insn scheduling routines, primarily for dual issue. */
2703 spu_sched_issue_rate (void)
2709 uses_ls_unit(rtx_insn
*insn
)
2711 rtx set
= single_set (insn
);
2713 && (GET_CODE (SET_DEST (set
)) == MEM
2714 || GET_CODE (SET_SRC (set
)) == MEM
))
2720 get_pipe (rtx_insn
*insn
)
2723 /* Handle inline asm */
2724 if (INSN_CODE (insn
) == -1)
2726 t
= get_attr_type (insn
);
2751 case TYPE_IPREFETCH
:
2759 /* haifa-sched.c has a static variable that keeps track of the current
2760 cycle. It is passed to spu_sched_reorder, and we record it here for
2761 use by spu_sched_variable_issue. It won't be accurate if the
2762 scheduler updates it's clock_var between the two calls. */
2763 static int clock_var
;
2765 /* This is used to keep track of insn alignment. Set to 0 at the
2766 beginning of each block and increased by the "length" attr of each
2768 static int spu_sched_length
;
2770 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2771 ready list appropriately in spu_sched_reorder(). */
2772 static int pipe0_clock
;
2773 static int pipe1_clock
;
2775 static int prev_clock_var
;
2777 static int prev_priority
;
2779 /* The SPU needs to load the next ilb sometime during the execution of
2780 the previous ilb. There is a potential conflict if every cycle has a
2781 load or store. To avoid the conflict we make sure the load/store
2782 unit is free for at least one cycle during the execution of insns in
2783 the previous ilb. */
2784 static int spu_ls_first
;
2785 static int prev_ls_clock
;
2788 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2789 int max_ready ATTRIBUTE_UNUSED
)
2791 spu_sched_length
= 0;
2795 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2796 int max_ready ATTRIBUTE_UNUSED
)
2798 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2800 /* When any block might be at least 8-byte aligned, assume they
2801 will all be at least 8-byte aligned to make sure dual issue
2802 works out correctly. */
2803 spu_sched_length
= 0;
2805 spu_ls_first
= INT_MAX
;
2810 prev_clock_var
= -1;
2815 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2816 int verbose ATTRIBUTE_UNUSED
,
2817 rtx_insn
*insn
, int more
)
2821 if (GET_CODE (PATTERN (insn
)) == USE
2822 || GET_CODE (PATTERN (insn
)) == CLOBBER
2823 || (len
= get_attr_length (insn
)) == 0)
2826 spu_sched_length
+= len
;
2828 /* Reset on inline asm */
2829 if (INSN_CODE (insn
) == -1)
2831 spu_ls_first
= INT_MAX
;
2836 p
= get_pipe (insn
);
2838 pipe0_clock
= clock_var
;
2840 pipe1_clock
= clock_var
;
2844 if (clock_var
- prev_ls_clock
> 1
2845 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2846 spu_ls_first
= INT_MAX
;
2847 if (uses_ls_unit (insn
))
2849 if (spu_ls_first
== INT_MAX
)
2850 spu_ls_first
= spu_sched_length
;
2851 prev_ls_clock
= clock_var
;
2854 /* The scheduler hasn't inserted the nop, but we will later on.
2855 Include those nops in spu_sched_length. */
2856 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2857 spu_sched_length
+= 4;
2858 prev_clock_var
= clock_var
;
2860 /* more is -1 when called from spu_sched_reorder for new insns
2861 that don't have INSN_PRIORITY */
2863 prev_priority
= INSN_PRIORITY (insn
);
2866 /* Always try issuing more insns. spu_sched_reorder will decide
2867 when the cycle should be advanced. */
2871 /* This function is called for both TARGET_SCHED_REORDER and
2872 TARGET_SCHED_REORDER2. */
2874 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2875 rtx_insn
**ready
, int *nreadyp
, int clock
)
2877 int i
, nready
= *nreadyp
;
2878 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2883 if (nready
<= 0 || pipe1_clock
>= clock
)
2886 /* Find any rtl insns that don't generate assembly insns and schedule
2888 for (i
= nready
- 1; i
>= 0; i
--)
2891 if (INSN_CODE (insn
) == -1
2892 || INSN_CODE (insn
) == CODE_FOR_blockage
2893 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
2895 ready
[i
] = ready
[nready
- 1];
2896 ready
[nready
- 1] = insn
;
2901 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
2902 for (i
= 0; i
< nready
; i
++)
2903 if (INSN_CODE (ready
[i
]) != -1)
2906 switch (get_attr_type (insn
))
2931 case TYPE_IPREFETCH
:
2937 /* In the first scheduling phase, schedule loads and stores together
2938 to increase the chance they will get merged during postreload CSE. */
2939 if (!reload_completed
&& pipe_ls
>= 0)
2941 insn
= ready
[pipe_ls
];
2942 ready
[pipe_ls
] = ready
[nready
- 1];
2943 ready
[nready
- 1] = insn
;
2947 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
2951 /* When we have loads/stores in every cycle of the last 15 insns and
2952 we are about to schedule another load/store, emit an hbrp insn
2955 && spu_sched_length
- spu_ls_first
>= 4 * 15
2956 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
2958 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
2959 recog_memoized (insn
);
2960 if (pipe0_clock
< clock
)
2961 PUT_MODE (insn
, TImode
);
2962 spu_sched_variable_issue (file
, verbose
, insn
, -1);
2966 /* In general, we want to emit nops to increase dual issue, but dual
2967 issue isn't faster when one of the insns could be scheduled later
2968 without effecting the critical path. We look at INSN_PRIORITY to
2969 make a good guess, but it isn't perfect so -mdual-nops=n can be
2970 used to effect it. */
2971 if (in_spu_reorg
&& spu_dual_nops
< 10)
2973 /* When we are at an even address and we are not issuing nops to
2974 improve scheduling then we need to advance the cycle. */
2975 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
2976 && (spu_dual_nops
== 0
2979 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
2982 /* When at an odd address, schedule the highest priority insn
2983 without considering pipeline. */
2984 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
2985 && (spu_dual_nops
== 0
2987 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
2992 /* We haven't issued a pipe0 insn yet this cycle, if there is a
2993 pipe0 insn in the ready list, schedule it. */
2994 if (pipe0_clock
< clock
&& pipe_0
>= 0)
2995 schedule_i
= pipe_0
;
2997 /* Either we've scheduled a pipe0 insn already or there is no pipe0
2998 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3000 schedule_i
= pipe_1
;
3002 if (schedule_i
> -1)
3004 insn
= ready
[schedule_i
];
3005 ready
[schedule_i
] = ready
[nready
- 1];
3006 ready
[nready
- 1] = insn
;
3012 /* INSN is dependent on DEP_INSN. */
3014 spu_sched_adjust_cost (rtx_insn
*insn
, rtx link
, rtx_insn
*dep_insn
, int cost
)
3018 /* The blockage pattern is used to prevent instructions from being
3019 moved across it and has no cost. */
3020 if (INSN_CODE (insn
) == CODE_FOR_blockage
3021 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3024 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3025 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3028 /* Make sure hbrps are spread out. */
3029 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3030 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3033 /* Make sure hints and hbrps are 2 cycles apart. */
3034 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3035 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3036 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3037 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3040 /* An hbrp has no real dependency on other insns. */
3041 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3042 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3045 /* Assuming that it is unlikely an argument register will be used in
3046 the first cycle of the called function, we reduce the cost for
3047 slightly better scheduling of dep_insn. When not hinted, the
3048 mispredicted branch would hide the cost as well. */
3051 rtx target
= get_branch_target (insn
);
3052 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3057 /* And when returning from a function, let's assume the return values
3058 are completed sooner too. */
3059 if (CALL_P (dep_insn
))
3062 /* Make sure an instruction that loads from the back chain is schedule
3063 away from the return instruction so a hint is more likely to get
3065 if (INSN_CODE (insn
) == CODE_FOR__return
3066 && (set
= single_set (dep_insn
))
3067 && GET_CODE (SET_DEST (set
)) == REG
3068 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3071 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3072 scheduler makes every insn in a block anti-dependent on the final
3073 jump_insn. We adjust here so higher cost insns will get scheduled
3075 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3076 return insn_cost (dep_insn
) - 3;
3081 /* Create a CONST_DOUBLE from a string. */
3083 spu_float_const (const char *string
, machine_mode mode
)
3085 REAL_VALUE_TYPE value
;
3086 value
= REAL_VALUE_ATOF (string
, mode
);
3087 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3091 spu_constant_address_p (rtx x
)
3093 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3094 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3095 || GET_CODE (x
) == HIGH
);
3098 static enum spu_immediate
3099 which_immediate_load (HOST_WIDE_INT val
)
3101 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3103 if (val
>= -0x8000 && val
<= 0x7fff)
3105 if (val
>= 0 && val
<= 0x3ffff)
3107 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3109 if ((val
& 0xffff) == 0)
3115 /* Return true when OP can be loaded by one of the il instructions, or
3116 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3118 immediate_load_p (rtx op
, machine_mode mode
)
3120 if (CONSTANT_P (op
))
3122 enum immediate_class c
= classify_immediate (op
, mode
);
3123 return c
== IC_IL1
|| c
== IC_IL1s
3124 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3129 /* Return true if the first SIZE bytes of arr is a constant that can be
3130 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3131 represent the size and offset of the instruction to use. */
3133 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3135 int cpat
, run
, i
, start
;
3139 for (i
= 0; i
< size
&& cpat
; i
++)
3147 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3149 else if (arr
[i
] == 0)
3151 while (arr
[i
+run
] == run
&& i
+run
< 16)
3153 if (run
!= 4 && run
!= 8)
3158 if ((i
& (run
-1)) != 0)
3165 if (cpat
&& (run
|| size
< 16))
3172 *pstart
= start
== -1 ? 16-run
: start
;
3178 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3179 it into a register. MODE is only valid when OP is a CONST_INT. */
3180 static enum immediate_class
3181 classify_immediate (rtx op
, machine_mode mode
)
3184 unsigned char arr
[16];
3185 int i
, j
, repeated
, fsmbi
, repeat
;
3187 gcc_assert (CONSTANT_P (op
));
3189 if (GET_MODE (op
) != VOIDmode
)
3190 mode
= GET_MODE (op
);
3192 /* A V4SI const_vector with all identical symbols is ok. */
3195 && GET_CODE (op
) == CONST_VECTOR
3196 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3197 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3198 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3199 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3200 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3201 op
= CONST_VECTOR_ELT (op
, 0);
3203 switch (GET_CODE (op
))
3207 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3210 /* We can never know if the resulting address fits in 18 bits and can be
3211 loaded with ila. For now, assume the address will not overflow if
3212 the displacement is "small" (fits 'K' constraint). */
3213 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3215 rtx sym
= XEXP (XEXP (op
, 0), 0);
3216 rtx cst
= XEXP (XEXP (op
, 0), 1);
3218 if (GET_CODE (sym
) == SYMBOL_REF
3219 && GET_CODE (cst
) == CONST_INT
3220 && satisfies_constraint_K (cst
))
3229 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3230 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3231 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3237 constant_to_array (mode
, op
, arr
);
3239 /* Check that each 4-byte slot is identical. */
3241 for (i
= 4; i
< 16; i
+= 4)
3242 for (j
= 0; j
< 4; j
++)
3243 if (arr
[j
] != arr
[i
+ j
])
3248 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3249 val
= trunc_int_for_mode (val
, SImode
);
3251 if (which_immediate_load (val
) != SPU_NONE
)
3255 /* Any mode of 2 bytes or smaller can be loaded with an il
3257 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3261 for (i
= 0; i
< 16 && fsmbi
; i
++)
3262 if (arr
[i
] != 0 && repeat
== 0)
3264 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3267 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3269 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3282 static enum spu_immediate
3283 which_logical_immediate (HOST_WIDE_INT val
)
3285 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3287 if (val
>= -0x200 && val
<= 0x1ff)
3289 if (val
>= 0 && val
<= 0xffff)
3291 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3293 val
= trunc_int_for_mode (val
, HImode
);
3294 if (val
>= -0x200 && val
<= 0x1ff)
3296 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3298 val
= trunc_int_for_mode (val
, QImode
);
3299 if (val
>= -0x200 && val
<= 0x1ff)
3306 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3309 const_vector_immediate_p (rtx x
)
3312 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3313 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3314 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3315 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3321 logical_immediate_p (rtx op
, machine_mode mode
)
3324 unsigned char arr
[16];
3327 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3328 || GET_CODE (op
) == CONST_VECTOR
);
3330 if (GET_CODE (op
) == CONST_VECTOR
3331 && !const_vector_immediate_p (op
))
3334 if (GET_MODE (op
) != VOIDmode
)
3335 mode
= GET_MODE (op
);
3337 constant_to_array (mode
, op
, arr
);
3339 /* Check that bytes are repeated. */
3340 for (i
= 4; i
< 16; i
+= 4)
3341 for (j
= 0; j
< 4; j
++)
3342 if (arr
[j
] != arr
[i
+ j
])
3345 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3346 val
= trunc_int_for_mode (val
, SImode
);
3348 i
= which_logical_immediate (val
);
3349 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3353 iohl_immediate_p (rtx op
, machine_mode mode
)
3356 unsigned char arr
[16];
3359 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3360 || GET_CODE (op
) == CONST_VECTOR
);
3362 if (GET_CODE (op
) == CONST_VECTOR
3363 && !const_vector_immediate_p (op
))
3366 if (GET_MODE (op
) != VOIDmode
)
3367 mode
= GET_MODE (op
);
3369 constant_to_array (mode
, op
, arr
);
3371 /* Check that bytes are repeated. */
3372 for (i
= 4; i
< 16; i
+= 4)
3373 for (j
= 0; j
< 4; j
++)
3374 if (arr
[j
] != arr
[i
+ j
])
3377 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3378 val
= trunc_int_for_mode (val
, SImode
);
3380 return val
>= 0 && val
<= 0xffff;
3384 arith_immediate_p (rtx op
, machine_mode mode
,
3385 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3388 unsigned char arr
[16];
3391 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3392 || GET_CODE (op
) == CONST_VECTOR
);
3394 if (GET_CODE (op
) == CONST_VECTOR
3395 && !const_vector_immediate_p (op
))
3398 if (GET_MODE (op
) != VOIDmode
)
3399 mode
= GET_MODE (op
);
3401 constant_to_array (mode
, op
, arr
);
3403 if (VECTOR_MODE_P (mode
))
3404 mode
= GET_MODE_INNER (mode
);
3406 bytes
= GET_MODE_SIZE (mode
);
3407 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3409 /* Check that bytes are repeated. */
3410 for (i
= bytes
; i
< 16; i
+= bytes
)
3411 for (j
= 0; j
< bytes
; j
++)
3412 if (arr
[j
] != arr
[i
+ j
])
3416 for (j
= 1; j
< bytes
; j
++)
3417 val
= (val
<< 8) | arr
[j
];
3419 val
= trunc_int_for_mode (val
, mode
);
3421 return val
>= low
&& val
<= high
;
3424 /* TRUE when op is an immediate and an exact power of 2, and given that
3425 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3426 all entries must be the same. */
3428 exp2_immediate_p (rtx op
, machine_mode mode
, int low
, int high
)
3430 machine_mode int_mode
;
3432 unsigned char arr
[16];
3435 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3436 || GET_CODE (op
) == CONST_VECTOR
);
3438 if (GET_CODE (op
) == CONST_VECTOR
3439 && !const_vector_immediate_p (op
))
3442 if (GET_MODE (op
) != VOIDmode
)
3443 mode
= GET_MODE (op
);
3445 constant_to_array (mode
, op
, arr
);
3447 if (VECTOR_MODE_P (mode
))
3448 mode
= GET_MODE_INNER (mode
);
3450 bytes
= GET_MODE_SIZE (mode
);
3451 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3453 /* Check that bytes are repeated. */
3454 for (i
= bytes
; i
< 16; i
+= bytes
)
3455 for (j
= 0; j
< bytes
; j
++)
3456 if (arr
[j
] != arr
[i
+ j
])
3460 for (j
= 1; j
< bytes
; j
++)
3461 val
= (val
<< 8) | arr
[j
];
3463 val
= trunc_int_for_mode (val
, int_mode
);
3465 /* Currently, we only handle SFmode */
3466 gcc_assert (mode
== SFmode
);
3469 int exp
= (val
>> 23) - 127;
3470 return val
> 0 && (val
& 0x007fffff) == 0
3471 && exp
>= low
&& exp
<= high
;
3476 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3479 ea_symbol_ref_p (const_rtx x
)
3483 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3485 rtx plus
= XEXP (x
, 0);
3486 rtx op0
= XEXP (plus
, 0);
3487 rtx op1
= XEXP (plus
, 1);
3488 if (GET_CODE (op1
) == CONST_INT
)
3492 return (GET_CODE (x
) == SYMBOL_REF
3493 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3494 && TREE_CODE (decl
) == VAR_DECL
3495 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3499 - any 32-bit constant (SImode, SFmode)
3500 - any constant that can be generated with fsmbi (any mode)
3501 - a 64-bit constant where the high and low bits are identical
3503 - a 128-bit constant where the four 32-bit words match. */
3505 spu_legitimate_constant_p (machine_mode mode
, rtx x
)
3507 subrtx_iterator::array_type array
;
3508 if (GET_CODE (x
) == HIGH
)
3511 /* Reject any __ea qualified reference. These can't appear in
3512 instructions but must be forced to the constant pool. */
3513 FOR_EACH_SUBRTX (iter
, array
, x
, ALL
)
3514 if (ea_symbol_ref_p (*iter
))
3517 /* V4SI with all identical symbols is valid. */
3520 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3521 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3522 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3523 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3524 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3525 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3527 if (GET_CODE (x
) == CONST_VECTOR
3528 && !const_vector_immediate_p (x
))
3533 /* Valid address are:
3534 - symbol_ref, label_ref, const
3536 - reg + const_int, where const_int is 16 byte aligned
3537 - reg + reg, alignment doesn't matter
3538 The alignment matters in the reg+const case because lqd and stqd
3539 ignore the 4 least significant bits of the const. We only care about
3540 16 byte modes because the expand phase will change all smaller MEM
3541 references to TImode. */
3543 spu_legitimate_address_p (machine_mode mode
,
3544 rtx x
, bool reg_ok_strict
)
3546 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3548 && GET_CODE (x
) == AND
3549 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3550 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3552 switch (GET_CODE (x
))
3555 return !TARGET_LARGE_MEM
;
3559 /* Keep __ea references until reload so that spu_expand_mov can see them
3561 if (ea_symbol_ref_p (x
))
3562 return !reload_in_progress
&& !reload_completed
;
3563 return !TARGET_LARGE_MEM
;
3566 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3574 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3579 rtx op0
= XEXP (x
, 0);
3580 rtx op1
= XEXP (x
, 1);
3581 if (GET_CODE (op0
) == SUBREG
)
3582 op0
= XEXP (op0
, 0);
3583 if (GET_CODE (op1
) == SUBREG
)
3584 op1
= XEXP (op1
, 0);
3585 if (GET_CODE (op0
) == REG
3586 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3587 && GET_CODE (op1
) == CONST_INT
3588 && ((INTVAL (op1
) >= -0x2000 && INTVAL (op1
) <= 0x1fff)
3589 /* If virtual registers are involved, the displacement will
3590 change later on anyway, so checking would be premature.
3591 Reload will make sure the final displacement after
3592 register elimination is OK. */
3593 || op0
== arg_pointer_rtx
3594 || op0
== frame_pointer_rtx
3595 || op0
== virtual_stack_vars_rtx
)
3596 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3598 if (GET_CODE (op0
) == REG
3599 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3600 && GET_CODE (op1
) == REG
3601 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3612 /* Like spu_legitimate_address_p, except with named addresses. */
3614 spu_addr_space_legitimate_address_p (machine_mode mode
, rtx x
,
3615 bool reg_ok_strict
, addr_space_t as
)
3617 if (as
== ADDR_SPACE_EA
)
3618 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3620 else if (as
!= ADDR_SPACE_GENERIC
)
3623 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3626 /* When the address is reg + const_int, force the const_int into a
3629 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3630 machine_mode mode ATTRIBUTE_UNUSED
)
3633 /* Make sure both operands are registers. */
3634 if (GET_CODE (x
) == PLUS
)
3638 if (ALIGNED_SYMBOL_REF_P (op0
))
3640 op0
= force_reg (Pmode
, op0
);
3641 mark_reg_pointer (op0
, 128);
3643 else if (GET_CODE (op0
) != REG
)
3644 op0
= force_reg (Pmode
, op0
);
3645 if (ALIGNED_SYMBOL_REF_P (op1
))
3647 op1
= force_reg (Pmode
, op1
);
3648 mark_reg_pointer (op1
, 128);
3650 else if (GET_CODE (op1
) != REG
)
3651 op1
= force_reg (Pmode
, op1
);
3652 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3657 /* Like spu_legitimate_address, except with named address support. */
3659 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, machine_mode mode
,
3662 if (as
!= ADDR_SPACE_GENERIC
)
3665 return spu_legitimize_address (x
, oldx
, mode
);
3668 /* Reload reg + const_int for out-of-range displacements. */
3670 spu_legitimize_reload_address (rtx ad
, machine_mode mode ATTRIBUTE_UNUSED
,
3671 int opnum
, int type
)
3673 bool removed_and
= false;
3675 if (GET_CODE (ad
) == AND
3676 && CONST_INT_P (XEXP (ad
, 1))
3677 && INTVAL (XEXP (ad
, 1)) == (HOST_WIDE_INT
) - 16)
3683 if (GET_CODE (ad
) == PLUS
3684 && REG_P (XEXP (ad
, 0))
3685 && CONST_INT_P (XEXP (ad
, 1))
3686 && !(INTVAL (XEXP (ad
, 1)) >= -0x2000
3687 && INTVAL (XEXP (ad
, 1)) <= 0x1fff))
3689 /* Unshare the sum. */
3692 /* Reload the displacement. */
3693 push_reload (XEXP (ad
, 1), NULL_RTX
, &XEXP (ad
, 1), NULL
,
3694 BASE_REG_CLASS
, GET_MODE (ad
), VOIDmode
, 0, 0,
3695 opnum
, (enum reload_type
) type
);
3697 /* Add back AND for alignment if we stripped it. */
3699 ad
= gen_rtx_AND (GET_MODE (ad
), ad
, GEN_INT (-16));
3707 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3708 struct attribute_spec.handler. */
3710 spu_handle_fndecl_attribute (tree
* node
,
3712 tree args ATTRIBUTE_UNUSED
,
3713 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3715 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3717 warning (0, "%qE attribute only applies to functions",
3719 *no_add_attrs
= true;
3725 /* Handle the "vector" attribute. */
3727 spu_handle_vector_attribute (tree
* node
, tree name
,
3728 tree args ATTRIBUTE_UNUSED
,
3729 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3731 tree type
= *node
, result
= NULL_TREE
;
3735 while (POINTER_TYPE_P (type
)
3736 || TREE_CODE (type
) == FUNCTION_TYPE
3737 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3738 type
= TREE_TYPE (type
);
3740 mode
= TYPE_MODE (type
);
3742 unsigned_p
= TYPE_UNSIGNED (type
);
3746 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3749 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3752 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3755 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3758 result
= V4SF_type_node
;
3761 result
= V2DF_type_node
;
3767 /* Propagate qualifiers attached to the element type
3768 onto the vector type. */
3769 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3770 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3772 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3775 warning (0, "%qE attribute ignored", name
);
3777 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3782 /* Return nonzero if FUNC is a naked function. */
3784 spu_naked_function_p (tree func
)
3788 if (TREE_CODE (func
) != FUNCTION_DECL
)
3791 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3792 return a
!= NULL_TREE
;
3796 spu_initial_elimination_offset (int from
, int to
)
3798 int saved_regs_size
= spu_saved_regs_size ();
3800 if (!crtl
->is_leaf
|| crtl
->outgoing_args_size
3801 || get_frame_size () || saved_regs_size
)
3802 sp_offset
= STACK_POINTER_OFFSET
;
3803 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3804 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3805 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3806 return get_frame_size ();
3807 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3808 return sp_offset
+ crtl
->outgoing_args_size
3809 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3810 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3811 return get_frame_size () + saved_regs_size
+ sp_offset
;
3817 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3819 machine_mode mode
= TYPE_MODE (type
);
3820 int byte_size
= ((mode
== BLKmode
)
3821 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3823 /* Make sure small structs are left justified in a register. */
3824 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3825 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3830 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3831 int n
= byte_size
/ UNITS_PER_WORD
;
3832 v
= rtvec_alloc (nregs
);
3833 for (i
= 0; i
< n
; i
++)
3835 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3836 gen_rtx_REG (TImode
,
3839 GEN_INT (UNITS_PER_WORD
* i
));
3840 byte_size
-= UNITS_PER_WORD
;
3848 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3850 gen_rtx_EXPR_LIST (VOIDmode
,
3851 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3852 GEN_INT (UNITS_PER_WORD
* n
));
3854 return gen_rtx_PARALLEL (mode
, v
);
3856 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3860 spu_function_arg (cumulative_args_t cum_v
,
3862 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3864 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3867 if (*cum
>= MAX_REGISTER_ARGS
)
3870 byte_size
= ((mode
== BLKmode
)
3871 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3873 /* The ABI does not allow parameters to be passed partially in
3874 reg and partially in stack. */
3875 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3878 /* Make sure small structs are left justified in a register. */
3879 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3880 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3886 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3887 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3888 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
3890 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3893 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
3897 spu_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
3898 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3900 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
3902 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3905 ? ((int_size_in_bytes (type
) + 15) / 16)
3908 : HARD_REGNO_NREGS (cum
, mode
));
3911 /* Variable sized types are passed by reference. */
3913 spu_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED
,
3914 machine_mode mode ATTRIBUTE_UNUSED
,
3915 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3917 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3923 /* Create and return the va_list datatype.
3925 On SPU, va_list is an array type equivalent to
3927 typedef struct __va_list_tag
3929 void *__args __attribute__((__aligned(16)));
3930 void *__skip __attribute__((__aligned(16)));
3934 where __args points to the arg that will be returned by the next
3935 va_arg(), and __skip points to the previous stack frame such that
3936 when __args == __skip we should advance __args by 32 bytes. */
3938 spu_build_builtin_va_list (void)
3940 tree f_args
, f_skip
, record
, type_decl
;
3943 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3946 build_decl (BUILTINS_LOCATION
,
3947 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3949 f_args
= build_decl (BUILTINS_LOCATION
,
3950 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3951 f_skip
= build_decl (BUILTINS_LOCATION
,
3952 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3954 DECL_FIELD_CONTEXT (f_args
) = record
;
3955 DECL_ALIGN (f_args
) = 128;
3956 DECL_USER_ALIGN (f_args
) = 1;
3958 DECL_FIELD_CONTEXT (f_skip
) = record
;
3959 DECL_ALIGN (f_skip
) = 128;
3960 DECL_USER_ALIGN (f_skip
) = 1;
3962 TYPE_STUB_DECL (record
) = type_decl
;
3963 TYPE_NAME (record
) = type_decl
;
3964 TYPE_FIELDS (record
) = f_args
;
3965 DECL_CHAIN (f_args
) = f_skip
;
3967 /* We know this is being padded and we want it too. It is an internal
3968 type so hide the warnings from the user. */
3970 warn_padded
= false;
3972 layout_type (record
);
3976 /* The correct type is an array type of one element. */
3977 return build_array_type (record
, build_index_type (size_zero_node
));
3980 /* Implement va_start by filling the va_list structure VALIST.
3981 NEXTARG points to the first anonymous stack argument.
3983 The following global variables are used to initialize
3984 the va_list structure:
3987 the CUMULATIVE_ARGS for this function
3989 crtl->args.arg_offset_rtx:
3990 holds the offset of the first anonymous stack argument
3991 (relative to the virtual arg pointer). */
3994 spu_va_start (tree valist
, rtx nextarg
)
3996 tree f_args
, f_skip
;
3999 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4000 f_skip
= DECL_CHAIN (f_args
);
4002 valist
= build_simple_mem_ref (valist
);
4004 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4006 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4008 /* Find the __args area. */
4009 t
= make_tree (TREE_TYPE (args
), nextarg
);
4010 if (crtl
->args
.pretend_args_size
> 0)
4011 t
= fold_build_pointer_plus_hwi (t
, -STACK_POINTER_OFFSET
);
4012 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4013 TREE_SIDE_EFFECTS (t
) = 1;
4014 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4016 /* Find the __skip area. */
4017 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4018 t
= fold_build_pointer_plus_hwi (t
, (crtl
->args
.pretend_args_size
4019 - STACK_POINTER_OFFSET
));
4020 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4021 TREE_SIDE_EFFECTS (t
) = 1;
4022 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4025 /* Gimplify va_arg by updating the va_list structure
4026 VALIST as required to retrieve an argument of type
4027 TYPE, and returning that argument.
4029 ret = va_arg(VALIST, TYPE);
4031 generates code equivalent to:
4033 paddedsize = (sizeof(TYPE) + 15) & -16;
4034 if (VALIST.__args + paddedsize > VALIST.__skip
4035 && VALIST.__args <= VALIST.__skip)
4036 addr = VALIST.__skip + 32;
4038 addr = VALIST.__args;
4039 VALIST.__args = addr + paddedsize;
4040 ret = *(TYPE *)addr;
4043 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4044 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4046 tree f_args
, f_skip
;
4048 HOST_WIDE_INT size
, rsize
;
4050 bool pass_by_reference_p
;
4052 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4053 f_skip
= DECL_CHAIN (f_args
);
4056 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4058 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4060 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4062 /* if an object is dynamically sized, a pointer to it is passed
4063 instead of the object itself. */
4064 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4066 if (pass_by_reference_p
)
4067 type
= build_pointer_type (type
);
4068 size
= int_size_in_bytes (type
);
4069 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4071 /* build conditional expression to calculate addr. The expression
4072 will be gimplified later. */
4073 tmp
= fold_build_pointer_plus_hwi (unshare_expr (args
), rsize
);
4074 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4075 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4076 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4077 unshare_expr (skip
)));
4079 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4080 fold_build_pointer_plus_hwi (unshare_expr (skip
), 32),
4081 unshare_expr (args
));
4083 gimplify_assign (addr
, tmp
, pre_p
);
4085 /* update VALIST.__args */
4086 tmp
= fold_build_pointer_plus_hwi (addr
, rsize
);
4087 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4089 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4092 if (pass_by_reference_p
)
4093 addr
= build_va_arg_indirect_ref (addr
);
4095 return build_va_arg_indirect_ref (addr
);
4098 /* Save parameter registers starting with the register that corresponds
4099 to the first unnamed parameters. If the first unnamed parameter is
4100 in the stack then save no registers. Set pretend_args_size to the
4101 amount of space needed to save the registers. */
4103 spu_setup_incoming_varargs (cumulative_args_t cum
, machine_mode mode
,
4104 tree type
, int *pretend_size
, int no_rtl
)
4111 int ncum
= *get_cumulative_args (cum
);
4113 /* cum currently points to the last named argument, we want to
4114 start at the next argument. */
4115 spu_function_arg_advance (pack_cumulative_args (&ncum
), mode
, type
, true);
4117 offset
= -STACK_POINTER_OFFSET
;
4118 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4120 tmp
= gen_frame_mem (V4SImode
,
4121 plus_constant (Pmode
, virtual_incoming_args_rtx
,
4123 emit_move_insn (tmp
,
4124 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4127 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4132 spu_conditional_register_usage (void)
4136 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4137 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4141 /* This is called any time we inspect the alignment of a register for
4144 reg_aligned_for_addr (rtx x
)
4147 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4148 return REGNO_POINTER_ALIGN (regno
) >= 128;
4151 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4152 into its SYMBOL_REF_FLAGS. */
4154 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4156 default_encode_section_info (decl
, rtl
, first
);
4158 /* If a variable has a forced alignment to < 16 bytes, mark it with
4159 SYMBOL_FLAG_ALIGN1. */
4160 if (TREE_CODE (decl
) == VAR_DECL
4161 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4162 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4165 /* Return TRUE if we are certain the mem refers to a complete object
4166 which is both 16-byte aligned and padded to a 16-byte boundary. This
4167 would make it safe to store with a single instruction.
4168 We guarantee the alignment and padding for static objects by aligning
4169 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4170 FIXME: We currently cannot guarantee this for objects on the stack
4171 because assign_parm_setup_stack calls assign_stack_local with the
4172 alignment of the parameter mode and in that case the alignment never
4173 gets adjusted by LOCAL_ALIGNMENT. */
4175 store_with_one_insn_p (rtx mem
)
4177 machine_mode mode
= GET_MODE (mem
);
4178 rtx addr
= XEXP (mem
, 0);
4179 if (mode
== BLKmode
)
4181 if (GET_MODE_SIZE (mode
) >= 16)
4183 /* Only static objects. */
4184 if (GET_CODE (addr
) == SYMBOL_REF
)
4186 /* We use the associated declaration to make sure the access is
4187 referring to the whole object.
4188 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4189 if it is necessary. Will there be cases where one exists, and
4190 the other does not? Will there be cases where both exist, but
4191 have different types? */
4192 tree decl
= MEM_EXPR (mem
);
4194 && TREE_CODE (decl
) == VAR_DECL
4195 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4197 decl
= SYMBOL_REF_DECL (addr
);
4199 && TREE_CODE (decl
) == VAR_DECL
4200 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4206 /* Return 1 when the address is not valid for a simple load and store as
4207 required by the '_mov*' patterns. We could make this less strict
4208 for loads, but we prefer mem's to look the same so they are more
4209 likely to be merged. */
4211 address_needs_split (rtx mem
)
4213 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4214 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4215 || !(store_with_one_insn_p (mem
)
4216 || mem_is_padded_component_ref (mem
))))
4222 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4223 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4224 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4226 /* MEM is known to be an __ea qualified memory access. Emit a call to
4227 fetch the ppu memory to local store, and return its address in local
4231 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4235 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4236 if (!cache_fetch_dirty
)
4237 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4238 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4239 2, ea_addr
, EAmode
, ndirty
, SImode
);
4244 cache_fetch
= init_one_libfunc ("__cache_fetch");
4245 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4246 1, ea_addr
, EAmode
);
4250 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4251 dirty bit marking, inline.
4253 The cache control data structure is an array of
4255 struct __cache_tag_array
4257 unsigned int tag_lo[4];
4258 unsigned int tag_hi[4];
4259 void *data_pointer[4];
4261 vector unsigned short dirty_bits[4];
4265 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4269 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4270 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4271 rtx index_mask
= gen_reg_rtx (SImode
);
4272 rtx tag_arr
= gen_reg_rtx (Pmode
);
4273 rtx splat_mask
= gen_reg_rtx (TImode
);
4274 rtx splat
= gen_reg_rtx (V4SImode
);
4275 rtx splat_hi
= NULL_RTX
;
4276 rtx tag_index
= gen_reg_rtx (Pmode
);
4277 rtx block_off
= gen_reg_rtx (SImode
);
4278 rtx tag_addr
= gen_reg_rtx (Pmode
);
4279 rtx tag
= gen_reg_rtx (V4SImode
);
4280 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4281 rtx cache_tag_hi
= NULL_RTX
;
4282 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4283 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4284 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4285 rtx tag_equal_hi
= NULL_RTX
;
4286 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4287 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4288 rtx eq_index
= gen_reg_rtx (SImode
);
4289 rtx bcomp
, hit_label
, hit_ref
, cont_label
;
4292 if (spu_ea_model
!= 32)
4294 splat_hi
= gen_reg_rtx (V4SImode
);
4295 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4296 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4299 emit_move_insn (index_mask
, plus_constant (Pmode
, tag_size_sym
, -128));
4300 emit_move_insn (tag_arr
, tag_arr_sym
);
4301 v
= 0x0001020300010203LL
;
4302 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4303 ea_addr_si
= ea_addr
;
4304 if (spu_ea_model
!= 32)
4305 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4307 /* tag_index = ea_addr & (tag_array_size - 128) */
4308 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4310 /* splat ea_addr to all 4 slots. */
4311 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4312 /* Similarly for high 32 bits of ea_addr. */
4313 if (spu_ea_model
!= 32)
4314 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4316 /* block_off = ea_addr & 127 */
4317 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4319 /* tag_addr = tag_arr + tag_index */
4320 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4322 /* Read cache tags. */
4323 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4324 if (spu_ea_model
!= 32)
4325 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4326 plus_constant (Pmode
,
4329 /* tag = ea_addr & -128 */
4330 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4332 /* Read all four cache data pointers. */
4333 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4334 plus_constant (Pmode
,
4338 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4339 if (spu_ea_model
!= 32)
4341 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4342 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4345 /* At most one of the tags compare equal, so tag_equal has one
4346 32-bit slot set to all 1's, with the other slots all zero.
4347 gbb picks off low bit from each byte in the 128-bit registers,
4348 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4350 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4351 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4353 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4354 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4356 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4357 (rotating eq_index mod 16 bytes). */
4358 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4359 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4361 /* Add block offset to form final data address. */
4362 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4364 /* Check that we did hit. */
4365 hit_label
= gen_label_rtx ();
4366 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4367 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4368 insn
= emit_jump_insn (gen_rtx_SET (pc_rtx
,
4369 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4371 /* Say that this branch is very likely to happen. */
4372 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4373 add_int_reg_note (insn
, REG_BR_PROB
, v
);
4375 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4376 cont_label
= gen_label_rtx ();
4377 emit_jump_insn (gen_jump (cont_label
));
4380 emit_label (hit_label
);
4385 rtx dirty_bits
= gen_reg_rtx (TImode
);
4386 rtx dirty_off
= gen_reg_rtx (SImode
);
4387 rtx dirty_128
= gen_reg_rtx (TImode
);
4388 rtx neg_block_off
= gen_reg_rtx (SImode
);
4390 /* Set up mask with one dirty bit per byte of the mem we are
4391 writing, starting from top bit. */
4393 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4394 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4399 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4401 /* Form index into cache dirty_bits. eq_index is one of
4402 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4403 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4404 offset to each of the four dirty_bits elements. */
4405 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4407 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4409 /* Rotate bit mask to proper bit. */
4410 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4411 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4412 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4414 /* Or in the new dirty bits. */
4415 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4418 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4421 emit_label (cont_label
);
4425 expand_ea_mem (rtx mem
, bool is_store
)
4428 rtx data_addr
= gen_reg_rtx (Pmode
);
4431 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4432 if (optimize_size
|| optimize
== 0)
4433 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4435 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4437 if (ea_alias_set
== -1)
4438 ea_alias_set
= new_alias_set ();
4440 /* We generate a new MEM RTX to refer to the copy of the data
4441 in the cache. We do not copy memory attributes (except the
4442 alignment) from the original MEM, as they may no longer apply
4443 to the cache copy. */
4444 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4445 set_mem_alias_set (new_mem
, ea_alias_set
);
4446 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4452 spu_expand_mov (rtx
* ops
, machine_mode mode
)
4454 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4456 /* Perform the move in the destination SUBREG's inner mode. */
4457 ops
[0] = SUBREG_REG (ops
[0]);
4458 mode
= GET_MODE (ops
[0]);
4459 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4460 gcc_assert (ops
[1]);
4463 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4465 rtx from
= SUBREG_REG (ops
[1]);
4466 machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4468 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4469 && GET_MODE_CLASS (imode
) == MODE_INT
4470 && subreg_lowpart_p (ops
[1]));
4472 if (GET_MODE_SIZE (imode
) < 4)
4474 if (imode
!= GET_MODE (from
))
4475 from
= gen_rtx_SUBREG (imode
, from
, 0);
4477 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4479 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4481 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4484 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4488 /* At least one of the operands needs to be a register. */
4489 if ((reload_in_progress
| reload_completed
) == 0
4490 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4492 rtx temp
= force_reg (mode
, ops
[1]);
4493 emit_move_insn (ops
[0], temp
);
4496 if (reload_in_progress
|| reload_completed
)
4498 if (CONSTANT_P (ops
[1]))
4499 return spu_split_immediate (ops
);
4503 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4505 if (GET_CODE (ops
[1]) == CONST_INT
)
4507 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4508 if (val
!= INTVAL (ops
[1]))
4510 emit_move_insn (ops
[0], GEN_INT (val
));
4516 if (MEM_ADDR_SPACE (ops
[0]))
4517 ops
[0] = expand_ea_mem (ops
[0], true);
4518 return spu_split_store (ops
);
4522 if (MEM_ADDR_SPACE (ops
[1]))
4523 ops
[1] = expand_ea_mem (ops
[1], false);
4524 return spu_split_load (ops
);
4531 spu_convert_move (rtx dst
, rtx src
)
4533 machine_mode mode
= GET_MODE (dst
);
4534 machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4536 gcc_assert (GET_MODE (src
) == TImode
);
4537 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4538 emit_insn (gen_rtx_SET (reg
,
4539 gen_rtx_TRUNCATE (int_mode
,
4540 gen_rtx_LSHIFTRT (TImode
, src
,
4541 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4542 if (int_mode
!= mode
)
4544 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4545 emit_move_insn (dst
, reg
);
4549 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4550 the address from SRC and SRC+16. Return a REG or CONST_INT that
4551 specifies how many bytes to rotate the loaded registers, plus any
4552 extra from EXTRA_ROTQBY. The address and rotate amounts are
4553 normalized to improve merging of loads and rotate computations. */
4555 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4557 rtx addr
= XEXP (src
, 0);
4558 rtx p0
, p1
, rot
, addr0
, addr1
;
4564 if (MEM_ALIGN (src
) >= 128)
4565 /* Address is already aligned; simply perform a TImode load. */ ;
4566 else if (GET_CODE (addr
) == PLUS
)
4569 aligned reg + aligned reg => lqx
4570 aligned reg + unaligned reg => lqx, rotqby
4571 aligned reg + aligned const => lqd
4572 aligned reg + unaligned const => lqd, rotqbyi
4573 unaligned reg + aligned reg => lqx, rotqby
4574 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4575 unaligned reg + aligned const => lqd, rotqby
4576 unaligned reg + unaligned const -> not allowed by legitimate address
4578 p0
= XEXP (addr
, 0);
4579 p1
= XEXP (addr
, 1);
4580 if (!reg_aligned_for_addr (p0
))
4582 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4584 rot
= gen_reg_rtx (SImode
);
4585 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4587 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4591 && INTVAL (p1
) * BITS_PER_UNIT
4592 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4594 rot
= gen_reg_rtx (SImode
);
4595 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4600 rtx x
= gen_reg_rtx (SImode
);
4601 emit_move_insn (x
, p1
);
4602 if (!spu_arith_operand (p1
, SImode
))
4604 rot
= gen_reg_rtx (SImode
);
4605 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4606 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4614 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4616 rot_amt
= INTVAL (p1
) & 15;
4617 if (INTVAL (p1
) & -16)
4619 p1
= GEN_INT (INTVAL (p1
) & -16);
4620 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4625 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4629 else if (REG_P (addr
))
4631 if (!reg_aligned_for_addr (addr
))
4634 else if (GET_CODE (addr
) == CONST
)
4636 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4637 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4638 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4640 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4642 addr
= gen_rtx_CONST (Pmode
,
4643 gen_rtx_PLUS (Pmode
,
4644 XEXP (XEXP (addr
, 0), 0),
4645 GEN_INT (rot_amt
& -16)));
4647 addr
= XEXP (XEXP (addr
, 0), 0);
4651 rot
= gen_reg_rtx (Pmode
);
4652 emit_move_insn (rot
, addr
);
4655 else if (GET_CODE (addr
) == CONST_INT
)
4657 rot_amt
= INTVAL (addr
);
4658 addr
= GEN_INT (rot_amt
& -16);
4660 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4662 rot
= gen_reg_rtx (Pmode
);
4663 emit_move_insn (rot
, addr
);
4666 rot_amt
+= extra_rotby
;
4672 rtx x
= gen_reg_rtx (SImode
);
4673 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4677 if (!rot
&& rot_amt
)
4678 rot
= GEN_INT (rot_amt
);
4680 addr0
= copy_rtx (addr
);
4681 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4682 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4686 addr1
= plus_constant (SImode
, copy_rtx (addr
), 16);
4687 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4688 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4695 spu_split_load (rtx
* ops
)
4697 machine_mode mode
= GET_MODE (ops
[0]);
4698 rtx addr
, load
, rot
;
4701 if (GET_MODE_SIZE (mode
) >= 16)
4704 addr
= XEXP (ops
[1], 0);
4705 gcc_assert (GET_CODE (addr
) != AND
);
4707 if (!address_needs_split (ops
[1]))
4709 ops
[1] = change_address (ops
[1], TImode
, addr
);
4710 load
= gen_reg_rtx (TImode
);
4711 emit_insn (gen__movti (load
, ops
[1]));
4712 spu_convert_move (ops
[0], load
);
4716 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4718 load
= gen_reg_rtx (TImode
);
4719 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4722 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4724 spu_convert_move (ops
[0], load
);
4729 spu_split_store (rtx
* ops
)
4731 machine_mode mode
= GET_MODE (ops
[0]);
4733 rtx addr
, p0
, p1
, p1_lo
, smem
;
4737 if (GET_MODE_SIZE (mode
) >= 16)
4740 addr
= XEXP (ops
[0], 0);
4741 gcc_assert (GET_CODE (addr
) != AND
);
4743 if (!address_needs_split (ops
[0]))
4745 reg
= gen_reg_rtx (TImode
);
4746 emit_insn (gen_spu_convert (reg
, ops
[1]));
4747 ops
[0] = change_address (ops
[0], TImode
, addr
);
4748 emit_move_insn (ops
[0], reg
);
4752 if (GET_CODE (addr
) == PLUS
)
4755 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4756 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4757 aligned reg + aligned const => lqd, c?d, shuf, stqx
4758 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4759 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4760 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4761 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4762 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4765 p0
= XEXP (addr
, 0);
4766 p1
= p1_lo
= XEXP (addr
, 1);
4767 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4769 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4770 if (reg_aligned_for_addr (p0
))
4772 p1
= GEN_INT (INTVAL (p1
) & -16);
4773 if (p1
== const0_rtx
)
4776 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4780 rtx x
= gen_reg_rtx (SImode
);
4781 emit_move_insn (x
, p1
);
4782 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4786 else if (REG_P (addr
))
4790 p1
= p1_lo
= const0_rtx
;
4795 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4796 p1
= 0; /* aform doesn't use p1 */
4798 if (ALIGNED_SYMBOL_REF_P (addr
))
4800 else if (GET_CODE (addr
) == CONST
4801 && GET_CODE (XEXP (addr
, 0)) == PLUS
4802 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4803 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4805 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4807 addr
= gen_rtx_CONST (Pmode
,
4808 gen_rtx_PLUS (Pmode
,
4809 XEXP (XEXP (addr
, 0), 0),
4810 GEN_INT (v
& -16)));
4812 addr
= XEXP (XEXP (addr
, 0), 0);
4813 p1_lo
= GEN_INT (v
& 15);
4815 else if (GET_CODE (addr
) == CONST_INT
)
4817 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4818 addr
= GEN_INT (INTVAL (addr
) & -16);
4822 p1_lo
= gen_reg_rtx (SImode
);
4823 emit_move_insn (p1_lo
, addr
);
4827 gcc_assert (aform
== 0 || aform
== 1);
4828 reg
= gen_reg_rtx (TImode
);
4830 scalar
= store_with_one_insn_p (ops
[0]);
4833 /* We could copy the flags from the ops[0] MEM to mem here,
4834 We don't because we want this load to be optimized away if
4835 possible, and copying the flags will prevent that in certain
4836 cases, e.g. consider the volatile flag. */
4838 rtx pat
= gen_reg_rtx (TImode
);
4839 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4840 set_mem_alias_set (lmem
, 0);
4841 emit_insn (gen_movti (reg
, lmem
));
4843 if (!p0
|| reg_aligned_for_addr (p0
))
4844 p0
= stack_pointer_rtx
;
4848 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4849 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4853 if (GET_CODE (ops
[1]) == REG
)
4854 emit_insn (gen_spu_convert (reg
, ops
[1]));
4855 else if (GET_CODE (ops
[1]) == SUBREG
)
4856 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4861 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4862 emit_insn (gen_ashlti3
4863 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4865 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4866 /* We can't use the previous alias set because the memory has changed
4867 size and can potentially overlap objects of other types. */
4868 set_mem_alias_set (smem
, 0);
4870 emit_insn (gen_movti (smem
, reg
));
4874 /* Return TRUE if X is MEM which is a struct member reference
4875 and the member can safely be loaded and stored with a single
4876 instruction because it is padded. */
4878 mem_is_padded_component_ref (rtx x
)
4880 tree t
= MEM_EXPR (x
);
4882 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4884 t
= TREE_OPERAND (t
, 1);
4885 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4886 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4888 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4889 r
= DECL_FIELD_CONTEXT (t
);
4890 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4892 /* Make sure they are the same mode */
4893 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4895 /* If there are no following fields then the field alignment assures
4896 the structure is padded to the alignment which means this field is
4898 if (TREE_CHAIN (t
) == 0)
4900 /* If the following field is also aligned then this field will be
4903 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4908 /* Parse the -mfixed-range= option string. */
4910 fix_range (const char *const_str
)
4913 char *str
, *dash
, *comma
;
4915 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4916 REG2 are either register names or register numbers. The effect
4917 of this option is to mark the registers in the range from REG1 to
4918 REG2 as ``fixed'' so they won't be used by the compiler. */
4920 i
= strlen (const_str
);
4921 str
= (char *) alloca (i
+ 1);
4922 memcpy (str
, const_str
, i
+ 1);
4926 dash
= strchr (str
, '-');
4929 warning (0, "value of -mfixed-range must have form REG1-REG2");
4933 comma
= strchr (dash
+ 1, ',');
4937 first
= decode_reg_name (str
);
4940 warning (0, "unknown register name: %s", str
);
4944 last
= decode_reg_name (dash
+ 1);
4947 warning (0, "unknown register name: %s", dash
+ 1);
4955 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4959 for (i
= first
; i
<= last
; ++i
)
4960 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4970 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4971 can be generated using the fsmbi instruction. */
4973 fsmbi_const_p (rtx x
)
4977 /* We can always choose TImode for CONST_INT because the high bits
4978 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4979 enum immediate_class c
= classify_immediate (x
, TImode
);
4980 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4985 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4986 can be generated using the cbd, chd, cwd or cdd instruction. */
4988 cpat_const_p (rtx x
, machine_mode mode
)
4992 enum immediate_class c
= classify_immediate (x
, mode
);
4993 return c
== IC_CPAT
;
4999 gen_cpat_const (rtx
* ops
)
5001 unsigned char dst
[16];
5002 int i
, offset
, shift
, isize
;
5003 if (GET_CODE (ops
[3]) != CONST_INT
5004 || GET_CODE (ops
[2]) != CONST_INT
5005 || (GET_CODE (ops
[1]) != CONST_INT
5006 && GET_CODE (ops
[1]) != REG
))
5008 if (GET_CODE (ops
[1]) == REG
5009 && (!REG_POINTER (ops
[1])
5010 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
5013 for (i
= 0; i
< 16; i
++)
5015 isize
= INTVAL (ops
[3]);
5018 else if (isize
== 2)
5022 offset
= (INTVAL (ops
[2]) +
5023 (GET_CODE (ops
[1]) ==
5024 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5025 for (i
= 0; i
< isize
; i
++)
5026 dst
[offset
+ i
] = i
+ shift
;
5027 return array_to_constant (TImode
, dst
);
5030 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5031 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5032 than 16 bytes, the value is repeated across the rest of the array. */
5034 constant_to_array (machine_mode mode
, rtx x
, unsigned char arr
[16])
5039 memset (arr
, 0, 16);
5040 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5041 if (GET_CODE (x
) == CONST_INT
5042 || (GET_CODE (x
) == CONST_DOUBLE
5043 && (mode
== SFmode
|| mode
== DFmode
)))
5045 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5047 if (GET_CODE (x
) == CONST_DOUBLE
)
5048 val
= const_double_to_hwint (x
);
5051 first
= GET_MODE_SIZE (mode
) - 1;
5052 for (i
= first
; i
>= 0; i
--)
5054 arr
[i
] = val
& 0xff;
5057 /* Splat the constant across the whole array. */
5058 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5061 j
= (j
== first
) ? 0 : j
+ 1;
5064 else if (GET_CODE (x
) == CONST_DOUBLE
)
5066 val
= CONST_DOUBLE_LOW (x
);
5067 for (i
= 15; i
>= 8; i
--)
5069 arr
[i
] = val
& 0xff;
5072 val
= CONST_DOUBLE_HIGH (x
);
5073 for (i
= 7; i
>= 0; i
--)
5075 arr
[i
] = val
& 0xff;
5079 else if (GET_CODE (x
) == CONST_VECTOR
)
5083 mode
= GET_MODE_INNER (mode
);
5084 units
= CONST_VECTOR_NUNITS (x
);
5085 for (i
= 0; i
< units
; i
++)
5087 elt
= CONST_VECTOR_ELT (x
, i
);
5088 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5090 if (GET_CODE (elt
) == CONST_DOUBLE
)
5091 val
= const_double_to_hwint (elt
);
5094 first
= GET_MODE_SIZE (mode
) - 1;
5095 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5097 for (j
= first
; j
>= 0; j
--)
5099 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5109 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5110 smaller than 16 bytes, use the bytes that would represent that value
5111 in a register, e.g., for QImode return the value of arr[3]. */
5113 array_to_constant (machine_mode mode
, const unsigned char arr
[16])
5115 machine_mode inner_mode
;
5117 int units
, size
, i
, j
, k
;
5120 if (GET_MODE_CLASS (mode
) == MODE_INT
5121 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5123 j
= GET_MODE_SIZE (mode
);
5124 i
= j
< 4 ? 4 - j
: 0;
5125 for (val
= 0; i
< j
; i
++)
5126 val
= (val
<< 8) | arr
[i
];
5127 val
= trunc_int_for_mode (val
, mode
);
5128 return GEN_INT (val
);
5134 for (i
= high
= 0; i
< 8; i
++)
5135 high
= (high
<< 8) | arr
[i
];
5136 for (i
= 8, val
= 0; i
< 16; i
++)
5137 val
= (val
<< 8) | arr
[i
];
5138 return immed_double_const (val
, high
, TImode
);
5142 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5143 val
= trunc_int_for_mode (val
, SImode
);
5144 return hwint_to_const_double (SFmode
, val
);
5148 for (i
= 0, val
= 0; i
< 8; i
++)
5149 val
= (val
<< 8) | arr
[i
];
5150 return hwint_to_const_double (DFmode
, val
);
5153 if (!VECTOR_MODE_P (mode
))
5156 units
= GET_MODE_NUNITS (mode
);
5157 size
= GET_MODE_UNIT_SIZE (mode
);
5158 inner_mode
= GET_MODE_INNER (mode
);
5159 v
= rtvec_alloc (units
);
5161 for (k
= i
= 0; i
< units
; ++i
)
5164 for (j
= 0; j
< size
; j
++, k
++)
5165 val
= (val
<< 8) | arr
[k
];
5167 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5168 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5170 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5175 return gen_rtx_CONST_VECTOR (mode
, v
);
5179 reloc_diagnostic (rtx x
)
5182 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5185 if (GET_CODE (x
) == SYMBOL_REF
)
5186 decl
= SYMBOL_REF_DECL (x
);
5187 else if (GET_CODE (x
) == CONST
5188 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5189 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5191 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5192 if (decl
&& !DECL_P (decl
))
5195 /* The decl could be a string constant. */
5196 if (decl
&& DECL_P (decl
))
5199 /* We use last_assemble_variable_decl to get line information. It's
5200 not always going to be right and might not even be close, but will
5201 be right for the more common cases. */
5202 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5203 loc
= DECL_SOURCE_LOCATION (decl
);
5205 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5207 if (TARGET_WARN_RELOC
)
5209 "creating run-time relocation for %qD", decl
);
5212 "creating run-time relocation for %qD", decl
);
5216 if (TARGET_WARN_RELOC
)
5217 warning_at (input_location
, 0, "creating run-time relocation");
5219 error_at (input_location
, "creating run-time relocation");
5223 /* Hook into assemble_integer so we can generate an error for run-time
5224 relocations. The SPU ABI disallows them. */
5226 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5228 /* By default run-time relocations aren't supported, but we allow them
5229 in case users support it in their own run-time loader. And we provide
5230 a warning for those users that don't. */
5231 if ((GET_CODE (x
) == SYMBOL_REF
)
5232 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5233 reloc_diagnostic (x
);
5235 return default_assemble_integer (x
, size
, aligned_p
);
5239 spu_asm_globalize_label (FILE * file
, const char *name
)
5241 fputs ("\t.global\t", file
);
5242 assemble_name (file
, name
);
5247 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
,
5248 int opno ATTRIBUTE_UNUSED
, int *total
,
5249 bool speed ATTRIBUTE_UNUSED
)
5251 machine_mode mode
= GET_MODE (x
);
5252 int cost
= COSTS_N_INSNS (2);
5254 /* Folding to a CONST_VECTOR will use extra space but there might
5255 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5256 only if it allows us to fold away multiple insns. Changing the cost
5257 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5258 because this cost will only be compared against a single insn.
5259 if (code == CONST_VECTOR)
5260 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5263 /* Use defaults for float operations. Not accurate but good enough. */
5266 *total
= COSTS_N_INSNS (13);
5271 *total
= COSTS_N_INSNS (6);
5277 if (satisfies_constraint_K (x
))
5279 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5280 *total
= COSTS_N_INSNS (1);
5282 *total
= COSTS_N_INSNS (3);
5286 *total
= COSTS_N_INSNS (3);
5291 *total
= COSTS_N_INSNS (0);
5295 *total
= COSTS_N_INSNS (5);
5299 case FLOAT_TRUNCATE
:
5301 case UNSIGNED_FLOAT
:
5304 *total
= COSTS_N_INSNS (7);
5310 *total
= COSTS_N_INSNS (9);
5317 GET_CODE (XEXP (x
, 0)) ==
5318 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5319 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5321 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5323 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5324 cost
= COSTS_N_INSNS (14);
5325 if ((val
& 0xffff) == 0)
5326 cost
= COSTS_N_INSNS (9);
5327 else if (val
> 0 && val
< 0x10000)
5328 cost
= COSTS_N_INSNS (11);
5337 *total
= COSTS_N_INSNS (20);
5344 *total
= COSTS_N_INSNS (4);
5347 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5348 *total
= COSTS_N_INSNS (0);
5350 *total
= COSTS_N_INSNS (4);
5353 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5354 if (GET_MODE_CLASS (mode
) == MODE_INT
5355 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5356 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5357 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5363 spu_unwind_word_mode (void)
5368 /* Decide whether we can make a sibling call to a function. DECL is the
5369 declaration of the function being targeted by the call and EXP is the
5370 CALL_EXPR representing the call. */
5372 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5374 return decl
&& !TARGET_LARGE_MEM
;
5377 /* We need to correctly update the back chain pointer and the Available
5378 Stack Size (which is in the second slot of the sp register.) */
5380 spu_allocate_stack (rtx op0
, rtx op1
)
5383 rtx chain
= gen_reg_rtx (V4SImode
);
5384 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5385 rtx sp
= gen_reg_rtx (V4SImode
);
5386 rtx splatted
= gen_reg_rtx (V4SImode
);
5387 rtx pat
= gen_reg_rtx (TImode
);
5389 /* copy the back chain so we can save it back again. */
5390 emit_move_insn (chain
, stack_bot
);
5392 op1
= force_reg (SImode
, op1
);
5394 v
= 0x1020300010203ll
;
5395 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5396 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5398 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5399 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5401 if (flag_stack_check
)
5403 rtx avail
= gen_reg_rtx(SImode
);
5404 rtx result
= gen_reg_rtx(SImode
);
5405 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5406 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5407 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5410 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5412 emit_move_insn (stack_bot
, chain
);
5414 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5418 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5420 static unsigned char arr
[16] =
5421 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5422 rtx temp
= gen_reg_rtx (SImode
);
5423 rtx temp2
= gen_reg_rtx (SImode
);
5424 rtx temp3
= gen_reg_rtx (V4SImode
);
5425 rtx temp4
= gen_reg_rtx (V4SImode
);
5426 rtx pat
= gen_reg_rtx (TImode
);
5427 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5429 /* Restore the backchain from the first word, sp from the second. */
5430 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5431 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5433 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5435 /* Compute Available Stack Size for sp */
5436 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5437 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5439 /* Compute Available Stack Size for back chain */
5440 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5441 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5442 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5444 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5445 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5449 spu_init_libfuncs (void)
5451 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5452 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5453 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5454 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5455 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5456 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5457 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5458 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5459 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5460 set_optab_libfunc (clrsb_optab
, DImode
, "__clrsbdi2");
5461 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5462 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5464 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5465 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5467 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5468 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5469 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5470 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5471 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5472 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5473 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5474 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5475 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5476 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5477 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5478 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5480 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5481 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5482 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5483 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5484 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5485 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5488 /* Make a subreg, stripping any existing subreg. We could possibly just
5489 call simplify_subreg, but in this case we know what we want. */
5491 spu_gen_subreg (machine_mode mode
, rtx x
)
5493 if (GET_CODE (x
) == SUBREG
)
5495 if (GET_MODE (x
) == mode
)
5497 return gen_rtx_SUBREG (mode
, x
, 0);
5501 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5503 return (TYPE_MODE (type
) == BLKmode
5505 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5506 || int_size_in_bytes (type
) >
5507 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5510 /* Create the built-in types and functions */
5512 enum spu_function_code
5514 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5515 #include "spu-builtins.def"
5520 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5522 struct spu_builtin_description spu_builtins
[] = {
5523 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5524 {fcode, icode, name, type, params},
5525 #include "spu-builtins.def"
5529 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5531 /* Returns the spu builtin decl for CODE. */
5534 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5536 if (code
>= NUM_SPU_BUILTINS
)
5537 return error_mark_node
;
5539 return spu_builtin_decls
[code
];
5544 spu_init_builtins (void)
5546 struct spu_builtin_description
*d
;
5549 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5550 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5551 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5552 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5553 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5554 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5556 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5557 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5558 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5559 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5561 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5563 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5564 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5565 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5566 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5567 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5568 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5569 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5570 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5571 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5572 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5573 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5574 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5576 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5577 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5578 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5579 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5580 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5581 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5582 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5583 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5585 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5586 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5588 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5590 spu_builtin_types
[SPU_BTI_PTR
] =
5591 build_pointer_type (build_qualified_type
5593 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5595 /* For each builtin we build a new prototype. The tree code will make
5596 sure nodes are shared. */
5597 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5600 char name
[64]; /* build_function will make a copy. */
5606 /* Find last parm. */
5607 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5612 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5614 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5616 sprintf (name
, "__builtin_%s", d
->name
);
5617 spu_builtin_decls
[i
] =
5618 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5619 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5620 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5622 /* These builtins don't throw. */
5623 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5628 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5630 static unsigned char arr
[16] =
5631 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5633 rtx temp
= gen_reg_rtx (Pmode
);
5634 rtx temp2
= gen_reg_rtx (V4SImode
);
5635 rtx temp3
= gen_reg_rtx (V4SImode
);
5636 rtx pat
= gen_reg_rtx (TImode
);
5637 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5639 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5641 /* Restore the sp. */
5642 emit_move_insn (temp
, op1
);
5643 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5645 /* Compute available stack size for sp. */
5646 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5647 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5649 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5650 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5654 spu_safe_dma (HOST_WIDE_INT channel
)
5656 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5660 spu_builtin_splats (rtx ops
[])
5662 machine_mode mode
= GET_MODE (ops
[0]);
5663 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5665 unsigned char arr
[16];
5666 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5667 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5671 rtx reg
= gen_reg_rtx (TImode
);
5673 if (GET_CODE (ops
[1]) != REG
5674 && GET_CODE (ops
[1]) != SUBREG
)
5675 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5681 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5687 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5692 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5697 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5703 emit_move_insn (reg
, shuf
);
5704 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5709 spu_builtin_extract (rtx ops
[])
5714 mode
= GET_MODE (ops
[1]);
5716 if (GET_CODE (ops
[2]) == CONST_INT
)
5721 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5724 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5727 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5730 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5733 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5736 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5744 from
= spu_gen_subreg (TImode
, ops
[1]);
5745 rot
= gen_reg_rtx (TImode
);
5746 tmp
= gen_reg_rtx (SImode
);
5751 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5754 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5755 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5759 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5763 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5768 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5770 emit_insn (gen_spu_convert (ops
[0], rot
));
5774 spu_builtin_insert (rtx ops
[])
5776 machine_mode mode
= GET_MODE (ops
[0]);
5777 machine_mode imode
= GET_MODE_INNER (mode
);
5778 rtx mask
= gen_reg_rtx (TImode
);
5781 if (GET_CODE (ops
[3]) == CONST_INT
)
5782 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5785 offset
= gen_reg_rtx (SImode
);
5786 emit_insn (gen_mulsi3
5787 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5790 (mask
, stack_pointer_rtx
, offset
,
5791 GEN_INT (GET_MODE_SIZE (imode
))));
5792 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5796 spu_builtin_promote (rtx ops
[])
5798 machine_mode mode
, imode
;
5799 rtx rot
, from
, offset
;
5802 mode
= GET_MODE (ops
[0]);
5803 imode
= GET_MODE_INNER (mode
);
5805 from
= gen_reg_rtx (TImode
);
5806 rot
= spu_gen_subreg (TImode
, ops
[0]);
5808 emit_insn (gen_spu_convert (from
, ops
[1]));
5810 if (GET_CODE (ops
[2]) == CONST_INT
)
5812 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5813 if (GET_MODE_SIZE (imode
) < 4)
5814 pos
+= 4 - GET_MODE_SIZE (imode
);
5815 offset
= GEN_INT (pos
& 15);
5819 offset
= gen_reg_rtx (SImode
);
5823 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5826 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5827 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5831 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5832 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5836 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5842 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5846 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5848 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5849 rtx shuf
= gen_reg_rtx (V4SImode
);
5850 rtx insn
= gen_reg_rtx (V4SImode
);
5855 fnaddr
= force_reg (SImode
, fnaddr
);
5856 cxt
= force_reg (SImode
, cxt
);
5858 if (TARGET_LARGE_MEM
)
5860 rtx rotl
= gen_reg_rtx (V4SImode
);
5861 rtx mask
= gen_reg_rtx (V4SImode
);
5862 rtx bi
= gen_reg_rtx (SImode
);
5863 static unsigned char const shufa
[16] = {
5864 2, 3, 0, 1, 18, 19, 16, 17,
5865 0, 1, 2, 3, 16, 17, 18, 19
5867 static unsigned char const insna
[16] = {
5869 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5871 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5874 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5875 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5877 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5878 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5879 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5880 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5882 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5883 emit_move_insn (mem
, insn
);
5885 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5886 mem
= adjust_address (m_tramp
, Pmode
, 16);
5887 emit_move_insn (mem
, bi
);
5891 rtx scxt
= gen_reg_rtx (SImode
);
5892 rtx sfnaddr
= gen_reg_rtx (SImode
);
5893 static unsigned char const insna
[16] = {
5894 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5900 shufc
= gen_reg_rtx (TImode
);
5901 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5903 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5904 fits 18 bits and the last 4 are zeros. This will be true if
5905 the stack pointer is initialized to 0x3fff0 at program start,
5906 otherwise the ila instruction will be garbage. */
5908 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5909 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5911 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5912 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5913 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5915 mem
= adjust_address (m_tramp
, V4SImode
, 0);
5916 emit_move_insn (mem
, insn
);
5918 emit_insn (gen_sync ());
5922 spu_warn_func_return (tree decl
)
5924 /* Naked functions are implemented entirely in assembly, including the
5925 return sequence, so suppress warnings about this. */
5926 return !spu_naked_function_p (decl
);
5930 spu_expand_sign_extend (rtx ops
[])
5932 unsigned char arr
[16];
5933 rtx pat
= gen_reg_rtx (TImode
);
5936 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5937 if (GET_MODE (ops
[1]) == QImode
)
5939 sign
= gen_reg_rtx (HImode
);
5940 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5941 for (i
= 0; i
< 16; i
++)
5947 for (i
= 0; i
< 16; i
++)
5949 switch (GET_MODE (ops
[1]))
5952 sign
= gen_reg_rtx (SImode
);
5953 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5955 arr
[last
- 1] = 0x02;
5958 sign
= gen_reg_rtx (SImode
);
5959 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5960 for (i
= 0; i
< 4; i
++)
5961 arr
[last
- i
] = 3 - i
;
5964 sign
= gen_reg_rtx (SImode
);
5965 c
= gen_reg_rtx (SImode
);
5966 emit_insn (gen_spu_convert (c
, ops
[1]));
5967 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5968 for (i
= 0; i
< 8; i
++)
5969 arr
[last
- i
] = 7 - i
;
5975 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5976 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5979 /* expand vector initialization. If there are any constant parts,
5980 load constant parts first. Then load any non-constant parts. */
5982 spu_expand_vector_init (rtx target
, rtx vals
)
5984 machine_mode mode
= GET_MODE (target
);
5985 int n_elts
= GET_MODE_NUNITS (mode
);
5987 bool all_same
= true;
5988 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5991 first
= XVECEXP (vals
, 0, 0);
5992 for (i
= 0; i
< n_elts
; ++i
)
5994 x
= XVECEXP (vals
, 0, i
);
5995 if (!(CONST_INT_P (x
)
5996 || GET_CODE (x
) == CONST_DOUBLE
5997 || GET_CODE (x
) == CONST_FIXED
))
6001 if (first_constant
== NULL_RTX
)
6004 if (i
> 0 && !rtx_equal_p (x
, first
))
6008 /* if all elements are the same, use splats to repeat elements */
6011 if (!CONSTANT_P (first
)
6012 && !register_operand (first
, GET_MODE (x
)))
6013 first
= force_reg (GET_MODE (first
), first
);
6014 emit_insn (gen_spu_splats (target
, first
));
6018 /* load constant parts */
6019 if (n_var
!= n_elts
)
6023 emit_move_insn (target
,
6024 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6028 rtx constant_parts_rtx
= copy_rtx (vals
);
6030 gcc_assert (first_constant
!= NULL_RTX
);
6031 /* fill empty slots with the first constant, this increases
6032 our chance of using splats in the recursive call below. */
6033 for (i
= 0; i
< n_elts
; ++i
)
6035 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6036 if (!(CONST_INT_P (x
)
6037 || GET_CODE (x
) == CONST_DOUBLE
6038 || GET_CODE (x
) == CONST_FIXED
))
6039 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6042 spu_expand_vector_init (target
, constant_parts_rtx
);
6046 /* load variable parts */
6049 rtx insert_operands
[4];
6051 insert_operands
[0] = target
;
6052 insert_operands
[2] = target
;
6053 for (i
= 0; i
< n_elts
; ++i
)
6055 x
= XVECEXP (vals
, 0, i
);
6056 if (!(CONST_INT_P (x
)
6057 || GET_CODE (x
) == CONST_DOUBLE
6058 || GET_CODE (x
) == CONST_FIXED
))
6060 if (!register_operand (x
, GET_MODE (x
)))
6061 x
= force_reg (GET_MODE (x
), x
);
6062 insert_operands
[1] = x
;
6063 insert_operands
[3] = GEN_INT (i
);
6064 spu_builtin_insert (insert_operands
);
6070 /* Return insn index for the vector compare instruction for given CODE,
6071 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6074 get_vec_cmp_insn (enum rtx_code code
,
6075 machine_mode dest_mode
,
6076 machine_mode op_mode
)
6082 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6083 return CODE_FOR_ceq_v16qi
;
6084 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6085 return CODE_FOR_ceq_v8hi
;
6086 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6087 return CODE_FOR_ceq_v4si
;
6088 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6089 return CODE_FOR_ceq_v4sf
;
6090 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6091 return CODE_FOR_ceq_v2df
;
6094 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6095 return CODE_FOR_cgt_v16qi
;
6096 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6097 return CODE_FOR_cgt_v8hi
;
6098 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6099 return CODE_FOR_cgt_v4si
;
6100 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6101 return CODE_FOR_cgt_v4sf
;
6102 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6103 return CODE_FOR_cgt_v2df
;
6106 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6107 return CODE_FOR_clgt_v16qi
;
6108 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6109 return CODE_FOR_clgt_v8hi
;
6110 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6111 return CODE_FOR_clgt_v4si
;
6119 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6120 DMODE is expected destination mode. This is a recursive function. */
6123 spu_emit_vector_compare (enum rtx_code rcode
,
6129 machine_mode dest_mode
;
6130 machine_mode op_mode
= GET_MODE (op1
);
6132 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6134 /* Floating point vector compare instructions uses destination V4SImode.
6135 Double floating point vector compare instructions uses destination V2DImode.
6136 Move destination to appropriate mode later. */
6137 if (dmode
== V4SFmode
)
6138 dest_mode
= V4SImode
;
6139 else if (dmode
== V2DFmode
)
6140 dest_mode
= V2DImode
;
6144 mask
= gen_reg_rtx (dest_mode
);
6145 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6147 if (vec_cmp_insn
== -1)
6149 bool swap_operands
= false;
6150 bool try_again
= false;
6155 swap_operands
= true;
6160 swap_operands
= true;
6170 /* Treat A != B as ~(A==B). */
6172 enum rtx_code rev_code
;
6173 enum insn_code nor_code
;
6176 rev_code
= reverse_condition_maybe_unordered (rcode
);
6177 rev_mask
= spu_emit_vector_compare (rev_code
, op0
, op1
, dest_mode
);
6179 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6180 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6181 emit_insn (GEN_FCN (nor_code
) (mask
, rev_mask
));
6182 if (dmode
!= dest_mode
)
6184 rtx temp
= gen_reg_rtx (dest_mode
);
6185 convert_move (temp
, mask
, 0);
6195 /* Try GT/GTU/LT/LTU OR EQ */
6198 enum insn_code ior_code
;
6199 enum rtx_code new_code
;
6203 case GE
: new_code
= GT
; break;
6204 case GEU
: new_code
= GTU
; break;
6205 case LE
: new_code
= LT
; break;
6206 case LEU
: new_code
= LTU
; break;
6211 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6212 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6214 ior_code
= optab_handler (ior_optab
, dest_mode
);
6215 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6216 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6217 if (dmode
!= dest_mode
)
6219 rtx temp
= gen_reg_rtx (dest_mode
);
6220 convert_move (temp
, mask
, 0);
6230 enum insn_code ior_code
;
6232 lt_rtx
= spu_emit_vector_compare (LT
, op0
, op1
, dest_mode
);
6233 gt_rtx
= spu_emit_vector_compare (GT
, op0
, op1
, dest_mode
);
6235 ior_code
= optab_handler (ior_optab
, dest_mode
);
6236 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6237 emit_insn (GEN_FCN (ior_code
) (mask
, lt_rtx
, gt_rtx
));
6238 if (dmode
!= dest_mode
)
6240 rtx temp
= gen_reg_rtx (dest_mode
);
6241 convert_move (temp
, mask
, 0);
6248 /* Implement as (A==A) & (B==B) */
6251 enum insn_code and_code
;
6253 a_rtx
= spu_emit_vector_compare (EQ
, op0
, op0
, dest_mode
);
6254 b_rtx
= spu_emit_vector_compare (EQ
, op1
, op1
, dest_mode
);
6256 and_code
= optab_handler (and_optab
, dest_mode
);
6257 gcc_assert (and_code
!= CODE_FOR_nothing
);
6258 emit_insn (GEN_FCN (and_code
) (mask
, a_rtx
, b_rtx
));
6259 if (dmode
!= dest_mode
)
6261 rtx temp
= gen_reg_rtx (dest_mode
);
6262 convert_move (temp
, mask
, 0);
6272 /* You only get two chances. */
6274 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6276 gcc_assert (vec_cmp_insn
!= -1);
6287 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6288 if (dmode
!= dest_mode
)
6290 rtx temp
= gen_reg_rtx (dest_mode
);
6291 convert_move (temp
, mask
, 0);
6298 /* Emit vector conditional expression.
6299 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6300 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6303 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6304 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6306 machine_mode dest_mode
= GET_MODE (dest
);
6307 enum rtx_code rcode
= GET_CODE (cond
);
6310 /* Get the vector mask for the given relational operations. */
6311 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6313 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6319 spu_force_reg (machine_mode mode
, rtx op
)
6322 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6324 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6325 || GET_MODE (op
) == BLKmode
)
6326 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6330 r
= force_reg (GET_MODE (op
), op
);
6331 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6333 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6338 x
= gen_reg_rtx (mode
);
6339 emit_insn (gen_spu_convert (x
, r
));
6344 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6346 HOST_WIDE_INT v
= 0;
6348 /* Check the range of immediate operands. */
6349 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6351 int range
= p
- SPU_BTI_7
;
6353 if (!CONSTANT_P (op
))
6354 error ("%s expects an integer literal in the range [%d, %d]",
6356 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6358 if (GET_CODE (op
) == CONST
6359 && (GET_CODE (XEXP (op
, 0)) == PLUS
6360 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6362 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6363 op
= XEXP (XEXP (op
, 0), 0);
6365 else if (GET_CODE (op
) == CONST_INT
)
6367 else if (GET_CODE (op
) == CONST_VECTOR
6368 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6369 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6371 /* The default for v is 0 which is valid in every range. */
6372 if (v
< spu_builtin_range
[range
].low
6373 || v
> spu_builtin_range
[range
].high
)
6374 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6376 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6385 /* This is only used in lqa, and stqa. Even though the insns
6386 encode 16 bits of the address (all but the 2 least
6387 significant), only 14 bits are used because it is masked to
6388 be 16 byte aligned. */
6392 /* This is used for lqr and stqr. */
6399 if (GET_CODE (op
) == LABEL_REF
6400 || (GET_CODE (op
) == SYMBOL_REF
6401 && SYMBOL_REF_FUNCTION_P (op
))
6402 || (v
& ((1 << lsbits
) - 1)) != 0)
6403 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6410 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6411 rtx target
, rtx ops
[])
6413 enum insn_code icode
= (enum insn_code
) d
->icode
;
6416 /* Expand the arguments into rtl. */
6418 if (d
->parm
[0] != SPU_BTI_VOID
)
6421 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6423 tree arg
= CALL_EXPR_ARG (exp
, a
);
6426 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6429 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6434 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6435 tree exp
, rtx target
)
6439 enum insn_code icode
= (enum insn_code
) d
->icode
;
6440 machine_mode mode
, tmode
;
6445 /* Set up ops[] with values from arglist. */
6446 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6448 /* Handle the target operand which must be operand 0. */
6450 if (d
->parm
[0] != SPU_BTI_VOID
)
6453 /* We prefer the mode specified for the match_operand otherwise
6454 use the mode from the builtin function prototype. */
6455 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6456 if (tmode
== VOIDmode
)
6457 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6459 /* Try to use target because not using it can lead to extra copies
6460 and when we are using all of the registers extra copies leads
6462 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6465 target
= ops
[0] = gen_reg_rtx (tmode
);
6467 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6473 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6475 machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6480 arg
= CALL_EXPR_ARG (exp
, 0);
6481 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6482 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6483 addr
= memory_address (mode
, op
);
6486 op
= gen_reg_rtx (GET_MODE (addr
));
6487 emit_insn (gen_rtx_SET (op
, gen_rtx_NEG (GET_MODE (addr
), addr
)));
6488 op
= gen_rtx_MEM (mode
, op
);
6490 pat
= GEN_FCN (icode
) (target
, op
);
6497 /* Ignore align_hint, but still expand it's args in case they have
6499 if (icode
== CODE_FOR_spu_align_hint
)
6502 /* Handle the rest of the operands. */
6503 for (p
= 1; i
< n_operands
; i
++, p
++)
6505 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6506 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6508 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6510 /* mode can be VOIDmode here for labels */
6512 /* For specific intrinsics with an immediate operand, e.g.,
6513 si_ai(), we sometimes need to convert the scalar argument to a
6514 vector argument by splatting the scalar. */
6515 if (VECTOR_MODE_P (mode
)
6516 && (GET_CODE (ops
[i
]) == CONST_INT
6517 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6518 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6520 if (GET_CODE (ops
[i
]) == CONST_INT
)
6521 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6524 rtx reg
= gen_reg_rtx (mode
);
6525 machine_mode imode
= GET_MODE_INNER (mode
);
6526 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6527 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6528 if (imode
!= GET_MODE (ops
[i
]))
6529 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6530 TYPE_UNSIGNED (spu_builtin_types
6532 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6537 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6539 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6540 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6546 pat
= GEN_FCN (icode
) (0);
6549 pat
= GEN_FCN (icode
) (ops
[0]);
6552 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6555 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6558 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6561 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6564 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6573 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6574 emit_call_insn (pat
);
6575 else if (d
->type
== B_JUMP
)
6577 emit_jump_insn (pat
);
6583 return_type
= spu_builtin_types
[d
->parm
[0]];
6584 if (d
->parm
[0] != SPU_BTI_VOID
6585 && GET_MODE (target
) != TYPE_MODE (return_type
))
6587 /* target is the return value. It should always be the mode of
6588 the builtin function prototype. */
6589 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6596 spu_expand_builtin (tree exp
,
6598 rtx subtarget ATTRIBUTE_UNUSED
,
6599 machine_mode mode ATTRIBUTE_UNUSED
,
6600 int ignore ATTRIBUTE_UNUSED
)
6602 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6603 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6604 struct spu_builtin_description
*d
;
6606 if (fcode
< NUM_SPU_BUILTINS
)
6608 d
= &spu_builtins
[fcode
];
6610 return spu_expand_builtin_1 (d
, exp
, target
);
6615 /* Implement targetm.vectorize.builtin_mask_for_load. */
6617 spu_builtin_mask_for_load (void)
6619 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6622 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6624 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6626 int misalign ATTRIBUTE_UNUSED
)
6630 switch (type_of_cost
)
6638 case cond_branch_not_taken
:
6640 case vec_promote_demote
:
6647 /* Load + rotate. */
6650 case unaligned_load
:
6653 case cond_branch_taken
:
6657 elements
= TYPE_VECTOR_SUBPARTS (vectype
);
6658 return elements
/ 2 + 1;
6665 /* Implement targetm.vectorize.init_cost. */
6668 spu_init_cost (struct loop
*loop_info ATTRIBUTE_UNUSED
)
6670 unsigned *cost
= XNEWVEC (unsigned, 3);
6671 cost
[vect_prologue
] = cost
[vect_body
] = cost
[vect_epilogue
] = 0;
6675 /* Implement targetm.vectorize.add_stmt_cost. */
6678 spu_add_stmt_cost (void *data
, int count
, enum vect_cost_for_stmt kind
,
6679 struct _stmt_vec_info
*stmt_info
, int misalign
,
6680 enum vect_cost_model_location where
)
6682 unsigned *cost
= (unsigned *) data
;
6683 unsigned retval
= 0;
6685 if (flag_vect_cost_model
)
6687 tree vectype
= stmt_info
? stmt_vectype (stmt_info
) : NULL_TREE
;
6688 int stmt_cost
= spu_builtin_vectorization_cost (kind
, vectype
, misalign
);
6690 /* Statements in an inner loop relative to the loop being
6691 vectorized are weighted more heavily. The value here is
6692 arbitrary and could potentially be improved with analysis. */
6693 if (where
== vect_body
&& stmt_info
&& stmt_in_inner_loop_p (stmt_info
))
6694 count
*= 50; /* FIXME. */
6696 retval
= (unsigned) (count
* stmt_cost
);
6697 cost
[where
] += retval
;
6703 /* Implement targetm.vectorize.finish_cost. */
6706 spu_finish_cost (void *data
, unsigned *prologue_cost
,
6707 unsigned *body_cost
, unsigned *epilogue_cost
)
6709 unsigned *cost
= (unsigned *) data
;
6710 *prologue_cost
= cost
[vect_prologue
];
6711 *body_cost
= cost
[vect_body
];
6712 *epilogue_cost
= cost
[vect_epilogue
];
6715 /* Implement targetm.vectorize.destroy_cost_data. */
6718 spu_destroy_cost_data (void *data
)
6723 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6724 after applying N number of iterations. This routine does not determine
6725 how may iterations are required to reach desired alignment. */
6728 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6733 /* All other types are naturally aligned. */
6737 /* Return the appropriate mode for a named address pointer. */
6739 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6743 case ADDR_SPACE_GENERIC
:
6752 /* Return the appropriate mode for a named address address. */
6754 spu_addr_space_address_mode (addr_space_t addrspace
)
6758 case ADDR_SPACE_GENERIC
:
6767 /* Determine if one named address space is a subset of another. */
6770 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6772 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6773 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6775 if (subset
== superset
)
6778 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6779 being subsets but instead as disjoint address spaces. */
6780 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6784 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6787 /* Convert from one address space to another. */
6789 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6791 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6792 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6794 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6795 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6797 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6801 ls
= gen_const_mem (DImode
,
6802 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6803 set_mem_align (ls
, 128);
6805 result
= gen_reg_rtx (Pmode
);
6806 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6807 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6808 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6809 ls
, const0_rtx
, Pmode
, 1);
6811 emit_insn (gen_subsi3 (result
, op
, ls
));
6816 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6820 ls
= gen_const_mem (DImode
,
6821 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6822 set_mem_align (ls
, 128);
6824 result
= gen_reg_rtx (EAmode
);
6825 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6826 op
= force_reg (Pmode
, op
);
6827 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6828 ls
, const0_rtx
, EAmode
, 1);
6829 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6831 if (EAmode
== SImode
)
6832 emit_insn (gen_addsi3 (result
, op
, ls
));
6834 emit_insn (gen_adddi3 (result
, op
, ls
));
6844 /* Count the total number of instructions in each pipe and return the
6845 maximum, which is used as the Minimum Iteration Interval (MII)
6846 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6847 -2 are instructions that can go in pipe0 or pipe1. */
6849 spu_sms_res_mii (struct ddg
*g
)
6852 unsigned t
[4] = {0, 0, 0, 0};
6854 for (i
= 0; i
< g
->num_nodes
; i
++)
6856 rtx_insn
*insn
= g
->nodes
[i
].insn
;
6857 int p
= get_pipe (insn
) + 2;
6859 gcc_assert (p
>= 0);
6863 if (dump_file
&& INSN_P (insn
))
6864 fprintf (dump_file
, "i%d %s %d %d\n",
6866 insn_data
[INSN_CODE(insn
)].name
,
6870 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6872 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6877 spu_init_expanders (void)
6882 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6883 frame_pointer_needed is true. We don't know that until we're
6884 expanding the prologue. */
6885 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6887 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6888 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6889 to be treated as aligned, so generate them here. */
6890 r0
= gen_reg_rtx (SImode
);
6891 r1
= gen_reg_rtx (SImode
);
6892 mark_reg_pointer (r0
, 128);
6893 mark_reg_pointer (r1
, 128);
6894 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6895 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6900 spu_libgcc_cmp_return_mode (void)
6903 /* For SPU word mode is TI mode so it is better to use SImode
6904 for compare returns. */
6909 spu_libgcc_shift_count_mode (void)
6911 /* For SPU word mode is TI mode so it is better to use SImode
6912 for shift counts. */
6916 /* Implement targetm.section_type_flags. */
6918 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6920 /* .toe needs to have type @nobits. */
6921 if (strcmp (name
, ".toe") == 0)
6923 /* Don't load _ea into the current address space. */
6924 if (strcmp (name
, "._ea") == 0)
6925 return SECTION_WRITE
| SECTION_DEBUG
;
6926 return default_section_type_flags (decl
, name
, reloc
);
6929 /* Implement targetm.select_section. */
6931 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
6933 /* Variables and constants defined in the __ea address space
6934 go into a special section named "._ea". */
6935 if (TREE_TYPE (decl
) != error_mark_node
6936 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
6938 /* We might get called with string constants, but get_named_section
6939 doesn't like them as they are not DECLs. Also, we need to set
6940 flags in that case. */
6942 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
6944 return get_named_section (decl
, "._ea", reloc
);
6947 return default_elf_select_section (decl
, reloc
, align
);
6950 /* Implement targetm.unique_section. */
6952 spu_unique_section (tree decl
, int reloc
)
6954 /* We don't support unique section names in the __ea address
6956 if (TREE_TYPE (decl
) != error_mark_node
6957 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
6960 default_unique_section (decl
, reloc
);
6963 /* Generate a constant or register which contains 2^SCALE. We assume
6964 the result is valid for MODE. Currently, MODE must be V4SFmode and
6965 SCALE must be SImode. */
6967 spu_gen_exp2 (machine_mode mode
, rtx scale
)
6969 gcc_assert (mode
== V4SFmode
);
6970 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6971 if (GET_CODE (scale
) != CONST_INT
)
6973 /* unsigned int exp = (127 + scale) << 23;
6974 __vector float m = (__vector float) spu_splats (exp); */
6975 rtx reg
= force_reg (SImode
, scale
);
6976 rtx exp
= gen_reg_rtx (SImode
);
6977 rtx mul
= gen_reg_rtx (mode
);
6978 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6979 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6980 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6985 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6986 unsigned char arr
[16];
6987 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6988 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6989 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6990 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6991 return array_to_constant (mode
, arr
);
6995 /* After reload, just change the convert into a move instruction
6996 or a dead instruction. */
6998 spu_split_convert (rtx ops
[])
7000 if (REGNO (ops
[0]) == REGNO (ops
[1]))
7001 emit_note (NOTE_INSN_DELETED
);
7004 /* Use TImode always as this might help hard reg copyprop. */
7005 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
7006 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
7007 emit_insn (gen_move_insn (op0
, op1
));
7012 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
7014 fprintf (file
, "# profile\n");
7015 fprintf (file
, "brsl $75, _mcount\n");
7018 /* Implement targetm.ref_may_alias_errno. */
7020 spu_ref_may_alias_errno (ao_ref
*ref
)
7022 tree base
= ao_ref_base (ref
);
7024 /* With SPU newlib, errno is defined as something like
7026 The default implementation of this target macro does not
7027 recognize such expressions, so special-code for it here. */
7029 if (TREE_CODE (base
) == VAR_DECL
7030 && !TREE_STATIC (base
)
7031 && DECL_EXTERNAL (base
)
7032 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7033 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7034 "_impure_data") == 0
7035 /* _errno is the first member of _impure_data. */
7036 && ref
->offset
== 0)
7039 return default_ref_may_alias_errno (ref
);
7042 /* Output thunk to FILE that implements a C++ virtual function call (with
7043 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7044 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7045 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7046 relative to the resulting this pointer. */
7049 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7050 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7055 /* Make sure unwind info is emitted for the thunk if needed. */
7056 final_start_function (emit_barrier (), file
, 1);
7058 /* Operand 0 is the target function. */
7059 op
[0] = XEXP (DECL_RTL (function
), 0);
7061 /* Operand 1 is the 'this' pointer. */
7062 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7063 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7065 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7067 /* Operands 2/3 are the low/high halfwords of delta. */
7068 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7069 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7071 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7072 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7073 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7075 /* Operands 6/7 are temporary registers. */
7076 op
[6] = gen_rtx_REG (Pmode
, 79);
7077 op
[7] = gen_rtx_REG (Pmode
, 78);
7079 /* Add DELTA to this pointer. */
7082 if (delta
>= -0x200 && delta
< 0x200)
7083 output_asm_insn ("ai\t%1,%1,%2", op
);
7084 else if (delta
>= -0x8000 && delta
< 0x8000)
7086 output_asm_insn ("il\t%6,%2", op
);
7087 output_asm_insn ("a\t%1,%1,%6", op
);
7091 output_asm_insn ("ilhu\t%6,%3", op
);
7092 output_asm_insn ("iohl\t%6,%2", op
);
7093 output_asm_insn ("a\t%1,%1,%6", op
);
7097 /* Perform vcall adjustment. */
7100 output_asm_insn ("lqd\t%7,0(%1)", op
);
7101 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7103 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7104 output_asm_insn ("ai\t%7,%7,%4", op
);
7105 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7107 output_asm_insn ("il\t%6,%4", op
);
7108 output_asm_insn ("a\t%7,%7,%6", op
);
7112 output_asm_insn ("ilhu\t%6,%5", op
);
7113 output_asm_insn ("iohl\t%6,%4", op
);
7114 output_asm_insn ("a\t%7,%7,%6", op
);
7117 output_asm_insn ("lqd\t%6,0(%7)", op
);
7118 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7119 output_asm_insn ("a\t%1,%1,%6", op
);
7122 /* Jump to target. */
7123 output_asm_insn ("br\t%0", op
);
7125 final_end_function ();
7128 /* Canonicalize a comparison from one we don't have to one we do have. */
7130 spu_canonicalize_comparison (int *code
, rtx
*op0
, rtx
*op1
,
7131 bool op0_preserve_value
)
7133 if (!op0_preserve_value
7134 && (*code
== LE
|| *code
== LT
|| *code
== LEU
|| *code
== LTU
))
7139 *code
= (int)swap_condition ((enum rtx_code
)*code
);
7143 /* Table of machine attributes. */
7144 static const struct attribute_spec spu_attribute_table
[] =
7146 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
7147 affects_type_identity } */
7148 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
7150 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
7152 { NULL
, 0, 0, false, false, false, NULL
, false }
7155 /* TARGET overrides. */
7157 #undef TARGET_ADDR_SPACE_POINTER_MODE
7158 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
7160 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
7161 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
7163 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
7164 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
7165 spu_addr_space_legitimate_address_p
7167 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
7168 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
7170 #undef TARGET_ADDR_SPACE_SUBSET_P
7171 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
7173 #undef TARGET_ADDR_SPACE_CONVERT
7174 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
7176 #undef TARGET_INIT_BUILTINS
7177 #define TARGET_INIT_BUILTINS spu_init_builtins
7178 #undef TARGET_BUILTIN_DECL
7179 #define TARGET_BUILTIN_DECL spu_builtin_decl
7181 #undef TARGET_EXPAND_BUILTIN
7182 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
7184 #undef TARGET_UNWIND_WORD_MODE
7185 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
7187 #undef TARGET_LEGITIMIZE_ADDRESS
7188 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
7190 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
7191 and .quad for the debugger. When it is known that the assembler is fixed,
7192 these can be removed. */
7193 #undef TARGET_ASM_UNALIGNED_SI_OP
7194 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
7196 #undef TARGET_ASM_ALIGNED_DI_OP
7197 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
7199 /* The .8byte directive doesn't seem to work well for a 32 bit
7201 #undef TARGET_ASM_UNALIGNED_DI_OP
7202 #define TARGET_ASM_UNALIGNED_DI_OP NULL
7204 #undef TARGET_RTX_COSTS
7205 #define TARGET_RTX_COSTS spu_rtx_costs
7207 #undef TARGET_ADDRESS_COST
7208 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
7210 #undef TARGET_SCHED_ISSUE_RATE
7211 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
7213 #undef TARGET_SCHED_INIT_GLOBAL
7214 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
7216 #undef TARGET_SCHED_INIT
7217 #define TARGET_SCHED_INIT spu_sched_init
7219 #undef TARGET_SCHED_VARIABLE_ISSUE
7220 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
7222 #undef TARGET_SCHED_REORDER
7223 #define TARGET_SCHED_REORDER spu_sched_reorder
7225 #undef TARGET_SCHED_REORDER2
7226 #define TARGET_SCHED_REORDER2 spu_sched_reorder
7228 #undef TARGET_SCHED_ADJUST_COST
7229 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
7231 #undef TARGET_ATTRIBUTE_TABLE
7232 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
7234 #undef TARGET_ASM_INTEGER
7235 #define TARGET_ASM_INTEGER spu_assemble_integer
7237 #undef TARGET_SCALAR_MODE_SUPPORTED_P
7238 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
7240 #undef TARGET_VECTOR_MODE_SUPPORTED_P
7241 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
7243 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
7244 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
7246 #undef TARGET_ASM_GLOBALIZE_LABEL
7247 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
7249 #undef TARGET_PASS_BY_REFERENCE
7250 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
7252 #undef TARGET_FUNCTION_ARG
7253 #define TARGET_FUNCTION_ARG spu_function_arg
7255 #undef TARGET_FUNCTION_ARG_ADVANCE
7256 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
7258 #undef TARGET_MUST_PASS_IN_STACK
7259 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
7261 #undef TARGET_BUILD_BUILTIN_VA_LIST
7262 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
7264 #undef TARGET_EXPAND_BUILTIN_VA_START
7265 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
7267 #undef TARGET_SETUP_INCOMING_VARARGS
7268 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
7270 #undef TARGET_MACHINE_DEPENDENT_REORG
7271 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
7273 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
7274 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
7276 #undef TARGET_INIT_LIBFUNCS
7277 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
7279 #undef TARGET_RETURN_IN_MEMORY
7280 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
7282 #undef TARGET_ENCODE_SECTION_INFO
7283 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
7285 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
7286 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
7288 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
7289 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
7291 #undef TARGET_VECTORIZE_INIT_COST
7292 #define TARGET_VECTORIZE_INIT_COST spu_init_cost
7294 #undef TARGET_VECTORIZE_ADD_STMT_COST
7295 #define TARGET_VECTORIZE_ADD_STMT_COST spu_add_stmt_cost
7297 #undef TARGET_VECTORIZE_FINISH_COST
7298 #define TARGET_VECTORIZE_FINISH_COST spu_finish_cost
7300 #undef TARGET_VECTORIZE_DESTROY_COST_DATA
7301 #define TARGET_VECTORIZE_DESTROY_COST_DATA spu_destroy_cost_data
7303 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
7304 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
7306 #undef TARGET_LIBGCC_CMP_RETURN_MODE
7307 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
7309 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
7310 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
7312 #undef TARGET_SCHED_SMS_RES_MII
7313 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
7315 #undef TARGET_SECTION_TYPE_FLAGS
7316 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
7318 #undef TARGET_ASM_SELECT_SECTION
7319 #define TARGET_ASM_SELECT_SECTION spu_select_section
7321 #undef TARGET_ASM_UNIQUE_SECTION
7322 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
7324 #undef TARGET_LEGITIMATE_ADDRESS_P
7325 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
7327 #undef TARGET_LEGITIMATE_CONSTANT_P
7328 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
7330 #undef TARGET_TRAMPOLINE_INIT
7331 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
7333 #undef TARGET_WARN_FUNC_RETURN
7334 #define TARGET_WARN_FUNC_RETURN spu_warn_func_return
7336 #undef TARGET_OPTION_OVERRIDE
7337 #define TARGET_OPTION_OVERRIDE spu_option_override
7339 #undef TARGET_CONDITIONAL_REGISTER_USAGE
7340 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
7342 #undef TARGET_REF_MAY_ALIAS_ERRNO
7343 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
7345 #undef TARGET_ASM_OUTPUT_MI_THUNK
7346 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
7347 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
7348 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
7350 /* Variable tracking should be run after all optimizations which
7351 change order of insns. It also needs a valid CFG. */
7352 #undef TARGET_DELAY_VARTRACK
7353 #define TARGET_DELAY_VARTRACK true
7355 #undef TARGET_CANONICALIZE_COMPARISON
7356 #define TARGET_CANONICALIZE_COMPARISON spu_canonicalize_comparison
7358 #undef TARGET_CAN_USE_DOLOOP_P
7359 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
7361 struct gcc_target targetm
= TARGET_INITIALIZER
;