1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tm-constrs.h"
59 /* Builtin types, data and prototypes. */
61 enum spu_builtin_type_index
63 SPU_BTI_END_OF_PARAMS
,
65 /* We create new type nodes for these. */
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
80 /* These all correspond to intSI_type_node */
94 /* These correspond to the standard types */
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
125 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
127 struct spu_builtin_range
132 static struct spu_builtin_range spu_builtin_range
[] = {
133 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
134 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
136 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
137 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
139 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
140 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
141 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
151 /* Prototypes and external defs. */
152 static void spu_init_builtins (void);
153 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
154 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
155 static bool spu_legitimate_address_p (enum machine_mode
, rtx
, bool);
156 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
157 static rtx
get_pic_reg (void);
158 static int need_to_save_reg (int regno
, int saving
);
159 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
160 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
161 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
163 static void emit_nop_for_insn (rtx insn
);
164 static bool insn_clobbers_hbr (rtx insn
);
165 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
166 int distance
, sbitmap blocks
);
167 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
168 enum machine_mode dmode
);
169 static rtx
get_branch_target (rtx branch
);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
174 static int get_pipe (rtx insn
);
175 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx
*, int *, int);
179 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
181 unsigned char *no_add_attrs
);
182 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
184 unsigned char *no_add_attrs
);
185 static int spu_naked_function_p (tree func
);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
187 const_tree type
, unsigned char named
);
188 static tree
spu_build_builtin_va_list (void);
189 static void spu_va_start (tree
, rtx
);
190 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
,
191 gimple_seq
* pre_p
, gimple_seq
* post_p
);
192 static int store_with_one_insn_p (rtx mem
);
193 static int mem_is_padded_component_ref (rtx x
);
194 static int reg_aligned_for_addr (rtx x
);
195 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
196 static void spu_asm_globalize_label (FILE * file
, const char *name
);
197 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
198 int *total
, bool speed
);
199 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree
, rtx
, int);
204 static rtx
spu_legitimize_address (rtx
, rtx
, enum machine_mode
);
205 static tree
spu_builtin_mul_widen_even (tree
);
206 static tree
spu_builtin_mul_widen_odd (tree
);
207 static tree
spu_builtin_mask_for_load (void);
208 static int spu_builtin_vectorization_cost (bool);
209 static bool spu_vector_alignment_reachable (const_tree
, bool);
210 static tree
spu_builtin_vec_perm (tree
, tree
*);
211 static int spu_sms_res_mii (struct ddg
*g
);
212 static void asm_file_start (void);
213 static unsigned int spu_section_type_flags (tree
, const char *, int);
214 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
216 extern const char *reg_names
[];
218 /* Which instruction set architecture to use. */
220 /* Which cpu are we tuning for. */
223 /* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229 int spu_hint_dist
= (8*4) - (2*4);
231 /* Determines whether we run variable tracking in machine dependent
233 static int spu_flag_var_tracking
;
248 IC_POOL
, /* constant pool */
249 IC_IL1
, /* one il* instruction */
250 IC_IL2
, /* both ilhu and iohl instructions */
251 IC_IL1s
, /* one il* instruction */
252 IC_IL2s
, /* both ilhu and iohl instructions */
253 IC_FSMBI
, /* the fsmbi instruction */
254 IC_CPAT
, /* one of the c*d instructions */
255 IC_FSMBI2
/* fsmbi plus 1 other instruction */
258 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
259 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
260 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
261 static enum immediate_class
classify_immediate (rtx op
,
262 enum machine_mode mode
);
264 static enum machine_mode
spu_unwind_word_mode (void);
266 static enum machine_mode
267 spu_libgcc_cmp_return_mode (void);
269 static enum machine_mode
270 spu_libgcc_shift_count_mode (void);
272 /* Table of machine attributes. */
273 static const struct attribute_spec spu_attribute_table
[] =
275 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
276 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
277 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
278 { NULL
, 0, 0, false, false, false, NULL
}
281 /* TARGET overrides. */
283 #undef TARGET_INIT_BUILTINS
284 #define TARGET_INIT_BUILTINS spu_init_builtins
286 #undef TARGET_EXPAND_BUILTIN
287 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
289 #undef TARGET_UNWIND_WORD_MODE
290 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
292 #undef TARGET_LEGITIMIZE_ADDRESS
293 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
295 /* The .8byte directive doesn't seem to work well for a 32 bit
297 #undef TARGET_ASM_UNALIGNED_DI_OP
298 #define TARGET_ASM_UNALIGNED_DI_OP NULL
300 #undef TARGET_RTX_COSTS
301 #define TARGET_RTX_COSTS spu_rtx_costs
303 #undef TARGET_ADDRESS_COST
304 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
306 #undef TARGET_SCHED_ISSUE_RATE
307 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
309 #undef TARGET_SCHED_INIT_GLOBAL
310 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
312 #undef TARGET_SCHED_INIT
313 #define TARGET_SCHED_INIT spu_sched_init
315 #undef TARGET_SCHED_VARIABLE_ISSUE
316 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
318 #undef TARGET_SCHED_REORDER
319 #define TARGET_SCHED_REORDER spu_sched_reorder
321 #undef TARGET_SCHED_REORDER2
322 #define TARGET_SCHED_REORDER2 spu_sched_reorder
324 #undef TARGET_SCHED_ADJUST_COST
325 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
327 #undef TARGET_ATTRIBUTE_TABLE
328 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
330 #undef TARGET_ASM_INTEGER
331 #define TARGET_ASM_INTEGER spu_assemble_integer
333 #undef TARGET_SCALAR_MODE_SUPPORTED_P
334 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
336 #undef TARGET_VECTOR_MODE_SUPPORTED_P
337 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
339 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
340 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
342 #undef TARGET_ASM_GLOBALIZE_LABEL
343 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
345 #undef TARGET_PASS_BY_REFERENCE
346 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
348 #undef TARGET_MUST_PASS_IN_STACK
349 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
351 #undef TARGET_BUILD_BUILTIN_VA_LIST
352 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
354 #undef TARGET_EXPAND_BUILTIN_VA_START
355 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
357 #undef TARGET_SETUP_INCOMING_VARARGS
358 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
360 #undef TARGET_MACHINE_DEPENDENT_REORG
361 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
363 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
364 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
366 #undef TARGET_DEFAULT_TARGET_FLAGS
367 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
369 #undef TARGET_INIT_LIBFUNCS
370 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
372 #undef TARGET_RETURN_IN_MEMORY
373 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
375 #undef TARGET_ENCODE_SECTION_INFO
376 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
378 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
379 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
381 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
382 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
384 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
385 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
387 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
388 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
390 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
391 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
393 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
394 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
396 #undef TARGET_LIBGCC_CMP_RETURN_MODE
397 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
399 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
400 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
402 #undef TARGET_SCHED_SMS_RES_MII
403 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
405 #undef TARGET_ASM_FILE_START
406 #define TARGET_ASM_FILE_START asm_file_start
408 #undef TARGET_SECTION_TYPE_FLAGS
409 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
411 #undef TARGET_LEGITIMATE_ADDRESS_P
412 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
414 struct gcc_target targetm
= TARGET_INITIALIZER
;
417 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
419 /* Override some of the default param values. With so many registers
420 larger values are better for these params. */
421 MAX_PENDING_LIST_LENGTH
= 128;
423 /* With so many registers this is better on by default. */
424 flag_rename_registers
= 1;
427 /* Sometimes certain combinations of command options do not make sense
428 on a particular target machine. You can define a macro
429 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
430 executed once just after all the command options have been parsed. */
432 spu_override_options (void)
434 /* Small loops will be unpeeled at -O3. For SPU it is more important
435 to keep code small by default. */
436 if (!flag_unroll_loops
&& !flag_peel_loops
437 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
438 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
440 flag_omit_frame_pointer
= 1;
442 /* Functions must be 8 byte aligned so we correctly handle dual issue */
443 if (align_functions
< 8)
446 spu_hint_dist
= 8*4 - spu_max_nops
*4;
447 if (spu_hint_dist
< 0)
450 if (spu_fixed_range_string
)
451 fix_range (spu_fixed_range_string
);
453 /* Determine processor architectural level. */
456 if (strcmp (&spu_arch_string
[0], "cell") == 0)
457 spu_arch
= PROCESSOR_CELL
;
458 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
459 spu_arch
= PROCESSOR_CELLEDP
;
461 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
464 /* Determine processor to tune for. */
467 if (strcmp (&spu_tune_string
[0], "cell") == 0)
468 spu_tune
= PROCESSOR_CELL
;
469 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
470 spu_tune
= PROCESSOR_CELLEDP
;
472 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
475 /* Change defaults according to the processor architecture. */
476 if (spu_arch
== PROCESSOR_CELLEDP
)
478 /* If no command line option has been otherwise specified, change
479 the default to -mno-safe-hints on celledp -- only the original
480 Cell/B.E. processors require this workaround. */
481 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
482 target_flags
&= ~MASK_SAFE_HINTS
;
485 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
488 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
489 struct attribute_spec.handler. */
491 /* True if MODE is valid for the target. By "valid", we mean able to
492 be manipulated in non-trivial ways. In particular, this means all
493 the arithmetic is supported. */
495 spu_scalar_mode_supported_p (enum machine_mode mode
)
513 /* Similarly for vector modes. "Supported" here is less strict. At
514 least some operations are supported; need to check optabs or builtins
515 for further details. */
517 spu_vector_mode_supported_p (enum machine_mode mode
)
534 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
535 least significant bytes of the outer mode. This function returns
536 TRUE for the SUBREG's where this is correct. */
538 valid_subreg (rtx op
)
540 enum machine_mode om
= GET_MODE (op
);
541 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
542 return om
!= VOIDmode
&& im
!= VOIDmode
543 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
544 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
545 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
548 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
549 and adjust the start offset. */
551 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
553 enum machine_mode mode
;
555 /* Strip any paradoxical SUBREG. */
556 if (GET_CODE (op
) == SUBREG
557 && (GET_MODE_BITSIZE (GET_MODE (op
))
558 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
562 GET_MODE_BITSIZE (GET_MODE (op
)) -
563 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
564 op
= SUBREG_REG (op
);
566 /* If it is smaller than SI, assure a SUBREG */
567 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
571 *start
+= 32 - op_size
;
574 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
575 mode
= mode_for_size (op_size
, MODE_INT
, 0);
576 if (mode
!= GET_MODE (op
))
577 op
= gen_rtx_SUBREG (mode
, op
, 0);
582 spu_expand_extv (rtx ops
[], int unsignedp
)
584 rtx dst
= ops
[0], src
= ops
[1];
585 HOST_WIDE_INT width
= INTVAL (ops
[2]);
586 HOST_WIDE_INT start
= INTVAL (ops
[3]);
587 HOST_WIDE_INT align_mask
;
588 rtx s0
, s1
, mask
, r0
;
590 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
594 /* First, determine if we need 1 TImode load or 2. We need only 1
595 if the bits being extracted do not cross the alignment boundary
596 as determined by the MEM and its address. */
598 align_mask
= -MEM_ALIGN (src
);
599 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
601 /* Alignment is sufficient for 1 load. */
602 s0
= gen_reg_rtx (TImode
);
603 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
606 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
611 s0
= gen_reg_rtx (TImode
);
612 s1
= gen_reg_rtx (TImode
);
613 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
616 gcc_assert (start
+ width
<= 128);
619 rtx r1
= gen_reg_rtx (SImode
);
620 mask
= gen_reg_rtx (TImode
);
621 emit_move_insn (mask
, GEN_INT (-1));
622 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
623 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
624 if (GET_CODE (r0
) == CONST_INT
)
625 r1
= GEN_INT (INTVAL (r0
) & 15);
627 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
628 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
629 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
634 else if (GET_CODE (src
) == SUBREG
)
636 rtx r
= SUBREG_REG (src
);
637 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
638 s0
= gen_reg_rtx (TImode
);
639 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
640 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
642 emit_move_insn (s0
, src
);
646 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
647 s0
= gen_reg_rtx (TImode
);
648 emit_move_insn (s0
, src
);
651 /* Now s0 is TImode and contains the bits to extract at start. */
654 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
658 tree c
= build_int_cst (NULL_TREE
, 128 - width
);
659 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, c
, s0
, unsignedp
);
662 emit_move_insn (dst
, s0
);
666 spu_expand_insv (rtx ops
[])
668 HOST_WIDE_INT width
= INTVAL (ops
[1]);
669 HOST_WIDE_INT start
= INTVAL (ops
[2]);
670 HOST_WIDE_INT maskbits
;
671 enum machine_mode dst_mode
, src_mode
;
672 rtx dst
= ops
[0], src
= ops
[3];
673 int dst_size
, src_size
;
679 if (GET_CODE (ops
[0]) == MEM
)
680 dst
= gen_reg_rtx (TImode
);
682 dst
= adjust_operand (dst
, &start
);
683 dst_mode
= GET_MODE (dst
);
684 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
686 if (CONSTANT_P (src
))
688 enum machine_mode m
=
689 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
690 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
692 src
= adjust_operand (src
, 0);
693 src_mode
= GET_MODE (src
);
694 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
696 mask
= gen_reg_rtx (dst_mode
);
697 shift_reg
= gen_reg_rtx (dst_mode
);
698 shift
= dst_size
- start
- width
;
700 /* It's not safe to use subreg here because the compiler assumes
701 that the SUBREG_REG is right justified in the SUBREG. */
702 convert_move (shift_reg
, src
, 1);
709 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
712 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
715 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
727 maskbits
= (-1ll << (32 - width
- start
));
729 maskbits
+= (1ll << (32 - start
));
730 emit_move_insn (mask
, GEN_INT (maskbits
));
733 maskbits
= (-1ll << (64 - width
- start
));
735 maskbits
+= (1ll << (64 - start
));
736 emit_move_insn (mask
, GEN_INT (maskbits
));
740 unsigned char arr
[16];
742 memset (arr
, 0, sizeof (arr
));
743 arr
[i
] = 0xff >> (start
& 7);
744 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
746 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
747 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
753 if (GET_CODE (ops
[0]) == MEM
)
755 rtx low
= gen_reg_rtx (SImode
);
756 rtx rotl
= gen_reg_rtx (SImode
);
757 rtx mask0
= gen_reg_rtx (TImode
);
763 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
764 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
765 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
766 emit_insn (gen_negsi2 (rotl
, low
));
767 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
768 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
769 mem
= change_address (ops
[0], TImode
, addr0
);
770 set_mem_alias_set (mem
, 0);
771 emit_move_insn (dst
, mem
);
772 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
773 if (start
+ width
> MEM_ALIGN (ops
[0]))
775 rtx shl
= gen_reg_rtx (SImode
);
776 rtx mask1
= gen_reg_rtx (TImode
);
777 rtx dst1
= gen_reg_rtx (TImode
);
779 addr1
= plus_constant (addr
, 16);
780 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
781 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
782 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
783 mem1
= change_address (ops
[0], TImode
, addr1
);
784 set_mem_alias_set (mem1
, 0);
785 emit_move_insn (dst1
, mem1
);
786 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
787 emit_move_insn (mem1
, dst1
);
789 emit_move_insn (mem
, dst
);
792 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
797 spu_expand_block_move (rtx ops
[])
799 HOST_WIDE_INT bytes
, align
, offset
;
800 rtx src
, dst
, sreg
, dreg
, target
;
802 if (GET_CODE (ops
[2]) != CONST_INT
803 || GET_CODE (ops
[3]) != CONST_INT
804 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
807 bytes
= INTVAL (ops
[2]);
808 align
= INTVAL (ops
[3]);
818 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
820 dst
= adjust_address (ops
[0], V16QImode
, offset
);
821 src
= adjust_address (ops
[1], V16QImode
, offset
);
822 emit_move_insn (dst
, src
);
827 unsigned char arr
[16] = { 0 };
828 for (i
= 0; i
< bytes
- offset
; i
++)
830 dst
= adjust_address (ops
[0], V16QImode
, offset
);
831 src
= adjust_address (ops
[1], V16QImode
, offset
);
832 mask
= gen_reg_rtx (V16QImode
);
833 sreg
= gen_reg_rtx (V16QImode
);
834 dreg
= gen_reg_rtx (V16QImode
);
835 target
= gen_reg_rtx (V16QImode
);
836 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
837 emit_move_insn (dreg
, dst
);
838 emit_move_insn (sreg
, src
);
839 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
840 emit_move_insn (dst
, target
);
848 { SPU_EQ
, SPU_GT
, SPU_GTU
};
850 int spu_comp_icode
[12][3] = {
851 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
852 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
853 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
854 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
855 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
856 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
857 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
858 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
859 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
860 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
861 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
862 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
865 /* Generate a compare for CODE. Return a brand-new rtx that represents
866 the result of the compare. GCC can figure this out too if we don't
867 provide all variations of compares, but GCC always wants to use
868 WORD_MODE, we can generate better code in most cases if we do it
871 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
873 int reverse_compare
= 0;
874 int reverse_test
= 0;
875 rtx compare_result
, eq_result
;
876 rtx comp_rtx
, eq_rtx
;
877 enum machine_mode comp_mode
;
878 enum machine_mode op_mode
;
879 enum spu_comp_code scode
, eq_code
;
880 enum insn_code ior_code
;
881 enum rtx_code code
= GET_CODE (cmp
);
882 rtx op0
= XEXP (cmp
, 0);
883 rtx op1
= XEXP (cmp
, 1);
887 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
888 and so on, to keep the constant in operand 1. */
889 if (GET_CODE (op1
) == CONST_INT
)
891 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
892 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
917 op_mode
= GET_MODE (op0
);
923 if (HONOR_NANS (op_mode
))
938 if (HONOR_NANS (op_mode
))
1018 comp_mode
= op_mode
;
1022 comp_mode
= op_mode
;
1026 comp_mode
= op_mode
;
1030 comp_mode
= V4SImode
;
1034 comp_mode
= V2DImode
;
1041 if (GET_MODE (op1
) == DFmode
1042 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
1045 if (is_set
== 0 && op1
== const0_rtx
1046 && (GET_MODE (op0
) == SImode
1047 || GET_MODE (op0
) == HImode
) && scode
== SPU_EQ
)
1049 /* Don't need to set a register with the result when we are
1050 comparing against zero and branching. */
1051 reverse_test
= !reverse_test
;
1052 compare_result
= op0
;
1056 compare_result
= gen_reg_rtx (comp_mode
);
1058 if (reverse_compare
)
1065 if (spu_comp_icode
[index
][scode
] == 0)
1068 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
1070 op0
= force_reg (op_mode
, op0
);
1071 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
1073 op1
= force_reg (op_mode
, op1
);
1074 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
1078 emit_insn (comp_rtx
);
1082 eq_result
= gen_reg_rtx (comp_mode
);
1083 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
1088 ior_code
= ior_optab
->handlers
[(int)comp_mode
].insn_code
;
1089 gcc_assert (ior_code
!= CODE_FOR_nothing
);
1090 emit_insn (GEN_FCN (ior_code
)
1091 (compare_result
, compare_result
, eq_result
));
1100 /* We don't have branch on QI compare insns, so we convert the
1101 QI compare result to a HI result. */
1102 if (comp_mode
== QImode
)
1104 rtx old_res
= compare_result
;
1105 compare_result
= gen_reg_rtx (HImode
);
1107 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
1111 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
1113 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
1115 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
1116 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
1117 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
1120 else if (is_set
== 2)
1122 rtx target
= operands
[0];
1123 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
1124 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
1125 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
1127 rtx op_t
= operands
[2];
1128 rtx op_f
= operands
[3];
1130 /* The result of the comparison can be SI, HI or QI mode. Create a
1131 mask based on that result. */
1132 if (target_size
> compare_size
)
1134 select_mask
= gen_reg_rtx (mode
);
1135 emit_insn (gen_extend_compare (select_mask
, compare_result
));
1137 else if (target_size
< compare_size
)
1139 gen_rtx_SUBREG (mode
, compare_result
,
1140 (compare_size
- target_size
) / BITS_PER_UNIT
);
1141 else if (comp_mode
!= mode
)
1142 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
1144 select_mask
= compare_result
;
1146 if (GET_MODE (target
) != GET_MODE (op_t
)
1147 || GET_MODE (target
) != GET_MODE (op_f
))
1151 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1153 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1157 rtx target
= operands
[0];
1159 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1160 gen_rtx_NOT (comp_mode
, compare_result
)));
1161 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1162 emit_insn (gen_extendhisi2 (target
, compare_result
));
1163 else if (GET_MODE (target
) == SImode
1164 && GET_MODE (compare_result
) == QImode
)
1165 emit_insn (gen_extend_compare (target
, compare_result
));
1167 emit_move_insn (target
, compare_result
);
1172 const_double_to_hwint (rtx x
)
1176 if (GET_MODE (x
) == SFmode
)
1178 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1179 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1181 else if (GET_MODE (x
) == DFmode
)
1184 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1185 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1187 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1195 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1199 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1202 tv
[0] = (v
<< 32) >> 32;
1203 else if (mode
== DFmode
)
1205 tv
[1] = (v
<< 32) >> 32;
1208 real_from_target (&rv
, tv
, mode
);
1209 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1213 print_operand_address (FILE * file
, register rtx addr
)
1218 if (GET_CODE (addr
) == AND
1219 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1220 && INTVAL (XEXP (addr
, 1)) == -16)
1221 addr
= XEXP (addr
, 0);
1223 switch (GET_CODE (addr
))
1226 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1230 reg
= XEXP (addr
, 0);
1231 offset
= XEXP (addr
, 1);
1232 if (GET_CODE (offset
) == REG
)
1234 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1235 reg_names
[REGNO (offset
)]);
1237 else if (GET_CODE (offset
) == CONST_INT
)
1239 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1240 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1250 output_addr_const (file
, addr
);
1260 print_operand (FILE * file
, rtx x
, int code
)
1262 enum machine_mode mode
= GET_MODE (x
);
1264 unsigned char arr
[16];
1265 int xcode
= GET_CODE (x
);
1267 if (GET_MODE (x
) == VOIDmode
)
1270 case 'L': /* 128 bits, signed */
1271 case 'm': /* 128 bits, signed */
1272 case 'T': /* 128 bits, signed */
1273 case 't': /* 128 bits, signed */
1276 case 'K': /* 64 bits, signed */
1277 case 'k': /* 64 bits, signed */
1278 case 'D': /* 64 bits, signed */
1279 case 'd': /* 64 bits, signed */
1282 case 'J': /* 32 bits, signed */
1283 case 'j': /* 32 bits, signed */
1284 case 's': /* 32 bits, signed */
1285 case 'S': /* 32 bits, signed */
1292 case 'j': /* 32 bits, signed */
1293 case 'k': /* 64 bits, signed */
1294 case 'm': /* 128 bits, signed */
1295 if (xcode
== CONST_INT
1296 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1298 gcc_assert (logical_immediate_p (x
, mode
));
1299 constant_to_array (mode
, x
, arr
);
1300 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1301 val
= trunc_int_for_mode (val
, SImode
);
1302 switch (which_logical_immediate (val
))
1307 fprintf (file
, "h");
1310 fprintf (file
, "b");
1320 case 'J': /* 32 bits, signed */
1321 case 'K': /* 64 bits, signed */
1322 case 'L': /* 128 bits, signed */
1323 if (xcode
== CONST_INT
1324 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1326 gcc_assert (logical_immediate_p (x
, mode
)
1327 || iohl_immediate_p (x
, mode
));
1328 constant_to_array (mode
, x
, arr
);
1329 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1330 val
= trunc_int_for_mode (val
, SImode
);
1331 switch (which_logical_immediate (val
))
1337 val
= trunc_int_for_mode (val
, HImode
);
1340 val
= trunc_int_for_mode (val
, QImode
);
1345 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1351 case 't': /* 128 bits, signed */
1352 case 'd': /* 64 bits, signed */
1353 case 's': /* 32 bits, signed */
1356 enum immediate_class c
= classify_immediate (x
, mode
);
1360 constant_to_array (mode
, x
, arr
);
1361 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1362 val
= trunc_int_for_mode (val
, SImode
);
1363 switch (which_immediate_load (val
))
1368 fprintf (file
, "a");
1371 fprintf (file
, "h");
1374 fprintf (file
, "hu");
1381 constant_to_array (mode
, x
, arr
);
1382 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1384 fprintf (file
, "b");
1386 fprintf (file
, "h");
1388 fprintf (file
, "w");
1390 fprintf (file
, "d");
1393 if (xcode
== CONST_VECTOR
)
1395 x
= CONST_VECTOR_ELT (x
, 0);
1396 xcode
= GET_CODE (x
);
1398 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1399 fprintf (file
, "a");
1400 else if (xcode
== HIGH
)
1401 fprintf (file
, "hu");
1415 case 'T': /* 128 bits, signed */
1416 case 'D': /* 64 bits, signed */
1417 case 'S': /* 32 bits, signed */
1420 enum immediate_class c
= classify_immediate (x
, mode
);
1424 constant_to_array (mode
, x
, arr
);
1425 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1426 val
= trunc_int_for_mode (val
, SImode
);
1427 switch (which_immediate_load (val
))
1434 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1439 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1442 constant_to_array (mode
, x
, arr
);
1444 for (i
= 0; i
< 16; i
++)
1449 print_operand (file
, GEN_INT (val
), 0);
1452 constant_to_array (mode
, x
, arr
);
1453 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1454 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1459 if (GET_CODE (x
) == CONST_VECTOR
)
1460 x
= CONST_VECTOR_ELT (x
, 0);
1461 output_addr_const (file
, x
);
1463 fprintf (file
, "@h");
1477 if (xcode
== CONST_INT
)
1479 /* Only 4 least significant bits are relevant for generate
1480 control word instructions. */
1481 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1486 case 'M': /* print code for c*d */
1487 if (GET_CODE (x
) == CONST_INT
)
1491 fprintf (file
, "b");
1494 fprintf (file
, "h");
1497 fprintf (file
, "w");
1500 fprintf (file
, "d");
1509 case 'N': /* Negate the operand */
1510 if (xcode
== CONST_INT
)
1511 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1512 else if (xcode
== CONST_VECTOR
)
1513 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1514 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1517 case 'I': /* enable/disable interrupts */
1518 if (xcode
== CONST_INT
)
1519 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1522 case 'b': /* branch modifiers */
1524 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1525 else if (COMPARISON_P (x
))
1526 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1529 case 'i': /* indirect call */
1532 if (GET_CODE (XEXP (x
, 0)) == REG
)
1533 /* Used in indirect function calls. */
1534 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1536 output_address (XEXP (x
, 0));
1540 case 'p': /* load/store */
1544 xcode
= GET_CODE (x
);
1549 xcode
= GET_CODE (x
);
1552 fprintf (file
, "d");
1553 else if (xcode
== CONST_INT
)
1554 fprintf (file
, "a");
1555 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1556 fprintf (file
, "r");
1557 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1559 if (GET_CODE (XEXP (x
, 1)) == REG
)
1560 fprintf (file
, "x");
1562 fprintf (file
, "d");
1567 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1569 output_addr_const (file
, GEN_INT (val
));
1573 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1575 output_addr_const (file
, GEN_INT (val
));
1579 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1581 output_addr_const (file
, GEN_INT (val
));
1585 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1586 val
= (val
>> 3) & 0x1f;
1587 output_addr_const (file
, GEN_INT (val
));
1591 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1594 output_addr_const (file
, GEN_INT (val
));
1598 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1601 output_addr_const (file
, GEN_INT (val
));
1605 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1608 output_addr_const (file
, GEN_INT (val
));
1612 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1613 val
= -(val
& -8ll);
1614 val
= (val
>> 3) & 0x1f;
1615 output_addr_const (file
, GEN_INT (val
));
1620 constant_to_array (mode
, x
, arr
);
1621 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1622 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1627 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1628 else if (xcode
== MEM
)
1629 output_address (XEXP (x
, 0));
1630 else if (xcode
== CONST_VECTOR
)
1631 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1633 output_addr_const (file
, x
);
1640 output_operand_lossage ("invalid %%xn code");
1645 extern char call_used_regs
[];
1647 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1648 caller saved register. For leaf functions it is more efficient to
1649 use a volatile register because we won't need to save and restore the
1650 pic register. This routine is only valid after register allocation
1651 is completed, so we can pick an unused register. */
1655 rtx pic_reg
= pic_offset_table_rtx
;
1656 if (!reload_completed
&& !reload_in_progress
)
1661 /* Split constant addresses to handle cases that are too large.
1662 Add in the pic register when in PIC mode.
1663 Split immediates that require more than 1 instruction. */
1665 spu_split_immediate (rtx
* ops
)
1667 enum machine_mode mode
= GET_MODE (ops
[0]);
1668 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1674 unsigned char arrhi
[16];
1675 unsigned char arrlo
[16];
1676 rtx to
, temp
, hi
, lo
;
1678 enum machine_mode imode
= mode
;
1679 /* We need to do reals as ints because the constant used in the
1680 IOR might not be a legitimate real constant. */
1681 imode
= int_mode_for_mode (mode
);
1682 constant_to_array (mode
, ops
[1], arrhi
);
1684 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1687 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1688 for (i
= 0; i
< 16; i
+= 4)
1690 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1691 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1692 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1693 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1695 hi
= array_to_constant (imode
, arrhi
);
1696 lo
= array_to_constant (imode
, arrlo
);
1697 emit_move_insn (temp
, hi
);
1698 emit_insn (gen_rtx_SET
1699 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1704 unsigned char arr_fsmbi
[16];
1705 unsigned char arr_andbi
[16];
1706 rtx to
, reg_fsmbi
, reg_and
;
1708 enum machine_mode imode
= mode
;
1709 /* We need to do reals as ints because the constant used in the
1710 * AND might not be a legitimate real constant. */
1711 imode
= int_mode_for_mode (mode
);
1712 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1714 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1717 for (i
= 0; i
< 16; i
++)
1718 if (arr_fsmbi
[i
] != 0)
1720 arr_andbi
[0] = arr_fsmbi
[i
];
1721 arr_fsmbi
[i
] = 0xff;
1723 for (i
= 1; i
< 16; i
++)
1724 arr_andbi
[i
] = arr_andbi
[0];
1725 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1726 reg_and
= array_to_constant (imode
, arr_andbi
);
1727 emit_move_insn (to
, reg_fsmbi
);
1728 emit_insn (gen_rtx_SET
1729 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1733 if (reload_in_progress
|| reload_completed
)
1735 rtx mem
= force_const_mem (mode
, ops
[1]);
1736 if (TARGET_LARGE_MEM
)
1738 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1739 emit_move_insn (addr
, XEXP (mem
, 0));
1740 mem
= replace_equiv_address (mem
, addr
);
1742 emit_move_insn (ops
[0], mem
);
1748 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1752 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1753 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1756 emit_insn (gen_pic (ops
[0], ops
[1]));
1759 rtx pic_reg
= get_pic_reg ();
1760 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1761 crtl
->uses_pic_offset_table
= 1;
1763 return flag_pic
|| c
== IC_IL2s
;
1774 /* SAVING is TRUE when we are generating the actual load and store
1775 instructions for REGNO. When determining the size of the stack
1776 needed for saving register we must allocate enough space for the
1777 worst case, because we don't always have the information early enough
1778 to not allocate it. But we can at least eliminate the actual loads
1779 and stores during the prologue/epilogue. */
1781 need_to_save_reg (int regno
, int saving
)
1783 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1786 && regno
== PIC_OFFSET_TABLE_REGNUM
1787 && (!saving
|| crtl
->uses_pic_offset_table
)
1789 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1794 /* This function is only correct starting with local register
1797 spu_saved_regs_size (void)
1799 int reg_save_size
= 0;
1802 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1803 if (need_to_save_reg (regno
, 0))
1804 reg_save_size
+= 0x10;
1805 return reg_save_size
;
1809 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1811 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1813 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1814 return emit_insn (gen_movv4si (mem
, reg
));
1818 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1820 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1822 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1823 return emit_insn (gen_movv4si (reg
, mem
));
1826 /* This happens after reload, so we need to expand it. */
1828 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1831 if (satisfies_constraint_K (GEN_INT (imm
)))
1833 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1837 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1838 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1839 if (REGNO (src
) == REGNO (scratch
))
1845 /* Return nonzero if this function is known to have a null epilogue. */
1848 direct_return (void)
1850 if (reload_completed
)
1852 if (cfun
->static_chain_decl
== 0
1853 && (spu_saved_regs_size ()
1855 + crtl
->outgoing_args_size
1856 + crtl
->args
.pretend_args_size
== 0)
1857 && current_function_is_leaf
)
1864 The stack frame looks like this:
1868 AP -> +-------------+
1871 prev SP | back chain |
1874 | reg save | crtl->args.pretend_args_size bytes
1877 | saved regs | spu_saved_regs_size() bytes
1878 FP -> +-------------+
1880 | vars | get_frame_size() bytes
1881 HFP -> +-------------+
1884 | args | crtl->outgoing_args_size bytes
1890 SP -> +-------------+
1894 spu_expand_prologue (void)
1896 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1897 HOST_WIDE_INT total_size
;
1898 HOST_WIDE_INT saved_regs_size
;
1899 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1900 rtx scratch_reg_0
, scratch_reg_1
;
1903 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1904 the "toplevel" insn chain. */
1905 emit_note (NOTE_INSN_DELETED
);
1907 if (flag_pic
&& optimize
== 0)
1908 crtl
->uses_pic_offset_table
= 1;
1910 if (spu_naked_function_p (current_function_decl
))
1913 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1914 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1916 saved_regs_size
= spu_saved_regs_size ();
1917 total_size
= size
+ saved_regs_size
1918 + crtl
->outgoing_args_size
1919 + crtl
->args
.pretend_args_size
;
1921 if (!current_function_is_leaf
1922 || cfun
->calls_alloca
|| total_size
> 0)
1923 total_size
+= STACK_POINTER_OFFSET
;
1925 /* Save this first because code after this might use the link
1926 register as a scratch register. */
1927 if (!current_function_is_leaf
)
1929 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1930 RTX_FRAME_RELATED_P (insn
) = 1;
1935 offset
= -crtl
->args
.pretend_args_size
;
1936 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1937 if (need_to_save_reg (regno
, 1))
1940 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1941 RTX_FRAME_RELATED_P (insn
) = 1;
1945 if (flag_pic
&& crtl
->uses_pic_offset_table
)
1947 rtx pic_reg
= get_pic_reg ();
1948 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1949 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1954 if (flag_stack_check
)
1956 /* We compare against total_size-1 because
1957 ($sp >= total_size) <=> ($sp > total_size-1) */
1958 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1959 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1960 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1961 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1963 emit_move_insn (scratch_v4si
, size_v4si
);
1964 size_v4si
= scratch_v4si
;
1966 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1967 emit_insn (gen_vec_extractv4si
1968 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1969 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1972 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1973 the value of the previous $sp because we save it as the back
1975 if (total_size
<= 2000)
1977 /* In this case we save the back chain first. */
1978 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1980 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1984 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1986 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1988 RTX_FRAME_RELATED_P (insn
) = 1;
1989 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1990 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1992 if (total_size
> 2000)
1994 /* Save the back chain ptr */
1995 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1998 if (frame_pointer_needed
)
2000 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
2001 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
2002 + crtl
->outgoing_args_size
;
2003 /* Set the new frame_pointer */
2004 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
2005 RTX_FRAME_RELATED_P (insn
) = 1;
2006 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
2007 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
2008 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
2012 emit_note (NOTE_INSN_DELETED
);
2016 spu_expand_epilogue (bool sibcall_p
)
2018 int size
= get_frame_size (), offset
, regno
;
2019 HOST_WIDE_INT saved_regs_size
, total_size
;
2020 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
2021 rtx jump
, scratch_reg_0
;
2023 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2024 the "toplevel" insn chain. */
2025 emit_note (NOTE_INSN_DELETED
);
2027 if (spu_naked_function_p (current_function_decl
))
2030 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
2032 saved_regs_size
= spu_saved_regs_size ();
2033 total_size
= size
+ saved_regs_size
2034 + crtl
->outgoing_args_size
2035 + crtl
->args
.pretend_args_size
;
2037 if (!current_function_is_leaf
2038 || cfun
->calls_alloca
|| total_size
> 0)
2039 total_size
+= STACK_POINTER_OFFSET
;
2043 if (cfun
->calls_alloca
)
2044 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
2046 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
2049 if (saved_regs_size
> 0)
2051 offset
= -crtl
->args
.pretend_args_size
;
2052 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2053 if (need_to_save_reg (regno
, 1))
2056 frame_emit_load (regno
, sp_reg
, offset
);
2061 if (!current_function_is_leaf
)
2062 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2066 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
2067 jump
= emit_jump_insn (gen__return ());
2068 emit_barrier_after (jump
);
2071 emit_note (NOTE_INSN_DELETED
);
2075 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
2079 /* This is inefficient because it ends up copying to a save-register
2080 which then gets saved even though $lr has already been saved. But
2081 it does generate better code for leaf functions and we don't need
2082 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2083 used for __builtin_return_address anyway, so maybe we don't care if
2084 it's inefficient. */
2085 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
2089 /* Given VAL, generate a constant appropriate for MODE.
2090 If MODE is a vector mode, every element will be VAL.
2091 For TImode, VAL will be zero extended to 128 bits. */
2093 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
2099 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
2100 || GET_MODE_CLASS (mode
) == MODE_FLOAT
2101 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
2102 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
2104 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2105 return immed_double_const (val
, 0, mode
);
2107 /* val is the bit representation of the float */
2108 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
2109 return hwint_to_const_double (mode
, val
);
2111 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
2112 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
2114 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
2116 units
= GET_MODE_NUNITS (mode
);
2118 v
= rtvec_alloc (units
);
2120 for (i
= 0; i
< units
; ++i
)
2121 RTVEC_ELT (v
, i
) = inner
;
2123 return gen_rtx_CONST_VECTOR (mode
, v
);
2126 /* Create a MODE vector constant from 4 ints. */
2128 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
2130 unsigned char arr
[16];
2131 arr
[0] = (a
>> 24) & 0xff;
2132 arr
[1] = (a
>> 16) & 0xff;
2133 arr
[2] = (a
>> 8) & 0xff;
2134 arr
[3] = (a
>> 0) & 0xff;
2135 arr
[4] = (b
>> 24) & 0xff;
2136 arr
[5] = (b
>> 16) & 0xff;
2137 arr
[6] = (b
>> 8) & 0xff;
2138 arr
[7] = (b
>> 0) & 0xff;
2139 arr
[8] = (c
>> 24) & 0xff;
2140 arr
[9] = (c
>> 16) & 0xff;
2141 arr
[10] = (c
>> 8) & 0xff;
2142 arr
[11] = (c
>> 0) & 0xff;
2143 arr
[12] = (d
>> 24) & 0xff;
2144 arr
[13] = (d
>> 16) & 0xff;
2145 arr
[14] = (d
>> 8) & 0xff;
2146 arr
[15] = (d
>> 0) & 0xff;
2147 return array_to_constant(mode
, arr
);
2150 /* branch hint stuff */
2152 /* An array of these is used to propagate hints to predecessor blocks. */
2155 rtx prop_jump
; /* propagated from another block */
2156 int bb_index
; /* the original block. */
2158 static struct spu_bb_info
*spu_bb_info
;
2160 #define STOP_HINT_P(INSN) \
2161 (GET_CODE(INSN) == CALL_INSN \
2162 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2163 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2165 /* 1 when RTX is a hinted branch or its target. We keep track of
2166 what has been hinted so the safe-hint code can test it easily. */
2167 #define HINTED_P(RTX) \
2168 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2170 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2171 #define SCHED_ON_EVEN_P(RTX) \
2172 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2174 /* Emit a nop for INSN such that the two will dual issue. This assumes
2175 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2176 We check for TImode to handle a MULTI1 insn which has dual issued its
2177 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2180 emit_nop_for_insn (rtx insn
)
2184 p
= get_pipe (insn
);
2185 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2186 new_insn
= emit_insn_after (gen_lnop (), insn
);
2187 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2189 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2190 PUT_MODE (new_insn
, TImode
);
2191 PUT_MODE (insn
, VOIDmode
);
2194 new_insn
= emit_insn_after (gen_lnop (), insn
);
2195 recog_memoized (new_insn
);
2198 /* Insert nops in basic blocks to meet dual issue alignment
2199 requirements. Also make sure hbrp and hint instructions are at least
2200 one cycle apart, possibly inserting a nop. */
2204 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2208 /* This sets up INSN_ADDRESSES. */
2209 shorten_branches (get_insns ());
2211 /* Keep track of length added by nops. */
2215 insn
= get_insns ();
2216 if (!active_insn_p (insn
))
2217 insn
= next_active_insn (insn
);
2218 for (; insn
; insn
= next_insn
)
2220 next_insn
= next_active_insn (insn
);
2221 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2222 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2226 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2227 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2228 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2231 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2232 PUT_MODE (prev_insn
, GET_MODE (insn
));
2233 PUT_MODE (insn
, TImode
);
2239 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2241 if (GET_MODE (insn
) == TImode
)
2242 PUT_MODE (next_insn
, TImode
);
2244 next_insn
= next_active_insn (insn
);
2246 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2247 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2249 if (((addr
+ length
) & 7) != 0)
2251 emit_nop_for_insn (prev_insn
);
2255 else if (GET_MODE (insn
) == TImode
2256 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2257 || get_attr_type (insn
) == TYPE_MULTI0
)
2258 && ((addr
+ length
) & 7) != 0)
2260 /* prev_insn will always be set because the first insn is
2261 always 8-byte aligned. */
2262 emit_nop_for_insn (prev_insn
);
2270 /* Routines for branch hints. */
2273 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2274 int distance
, sbitmap blocks
)
2276 rtx branch_label
= 0;
2281 if (before
== 0 || branch
== 0 || target
== 0)
2284 /* While scheduling we require hints to be no further than 600, so
2285 we need to enforce that here too */
2289 /* If we have a Basic block note, emit it after the basic block note. */
2290 if (NOTE_KIND (before
) == NOTE_INSN_BASIC_BLOCK
)
2291 before
= NEXT_INSN (before
);
2293 branch_label
= gen_label_rtx ();
2294 LABEL_NUSES (branch_label
)++;
2295 LABEL_PRESERVE_P (branch_label
) = 1;
2296 insn
= emit_label_before (branch_label
, branch
);
2297 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2298 SET_BIT (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2300 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2301 recog_memoized (hint
);
2302 HINTED_P (branch
) = 1;
2304 if (GET_CODE (target
) == LABEL_REF
)
2305 HINTED_P (XEXP (target
, 0)) = 1;
2306 else if (tablejump_p (branch
, 0, &table
))
2310 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2311 vec
= XVEC (PATTERN (table
), 0);
2313 vec
= XVEC (PATTERN (table
), 1);
2314 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2315 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2318 if (distance
>= 588)
2320 /* Make sure the hint isn't scheduled any earlier than this point,
2321 which could make it too far for the branch offest to fit */
2322 recog_memoized (emit_insn_before (gen_blockage (), hint
));
2324 else if (distance
<= 8 * 4)
2326 /* To guarantee at least 8 insns between the hint and branch we
2329 for (d
= distance
; d
< 8 * 4; d
+= 4)
2332 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2333 recog_memoized (insn
);
2336 /* Make sure any nops inserted aren't scheduled before the hint. */
2337 recog_memoized (emit_insn_after (gen_blockage (), hint
));
2339 /* Make sure any nops inserted aren't scheduled after the call. */
2340 if (CALL_P (branch
) && distance
< 8 * 4)
2341 recog_memoized (emit_insn_before (gen_blockage (), branch
));
2345 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2346 the rtx for the branch target. */
2348 get_branch_target (rtx branch
)
2350 if (GET_CODE (branch
) == JUMP_INSN
)
2354 /* Return statements */
2355 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2356 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2359 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2360 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2363 set
= single_set (branch
);
2364 src
= SET_SRC (set
);
2365 if (GET_CODE (SET_DEST (set
)) != PC
)
2368 if (GET_CODE (src
) == IF_THEN_ELSE
)
2371 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2374 /* If the more probable case is not a fall through, then
2375 try a branch hint. */
2376 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2377 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2378 && GET_CODE (XEXP (src
, 1)) != PC
)
2379 lab
= XEXP (src
, 1);
2380 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2381 && GET_CODE (XEXP (src
, 2)) != PC
)
2382 lab
= XEXP (src
, 2);
2386 if (GET_CODE (lab
) == RETURN
)
2387 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2395 else if (GET_CODE (branch
) == CALL_INSN
)
2398 /* All of our call patterns are in a PARALLEL and the CALL is
2399 the first pattern in the PARALLEL. */
2400 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2402 call
= XVECEXP (PATTERN (branch
), 0, 0);
2403 if (GET_CODE (call
) == SET
)
2404 call
= SET_SRC (call
);
2405 if (GET_CODE (call
) != CALL
)
2407 return XEXP (XEXP (call
, 0), 0);
2412 /* The special $hbr register is used to prevent the insn scheduler from
2413 moving hbr insns across instructions which invalidate them. It
2414 should only be used in a clobber, and this function searches for
2415 insns which clobber it. */
2417 insn_clobbers_hbr (rtx insn
)
2420 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2422 rtx parallel
= PATTERN (insn
);
2425 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2427 clobber
= XVECEXP (parallel
, 0, j
);
2428 if (GET_CODE (clobber
) == CLOBBER
2429 && GET_CODE (XEXP (clobber
, 0)) == REG
2430 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2437 /* Search up to 32 insns starting at FIRST:
2438 - at any kind of hinted branch, just return
2439 - at any unconditional branch in the first 15 insns, just return
2440 - at a call or indirect branch, after the first 15 insns, force it to
2441 an even address and return
2442 - at any unconditional branch, after the first 15 insns, force it to
2444 At then end of the search, insert an hbrp within 4 insns of FIRST,
2445 and an hbrp within 16 instructions of FIRST.
2448 insert_hbrp_for_ilb_runout (rtx first
)
2450 rtx insn
, before_4
= 0, before_16
= 0;
2451 int addr
= 0, length
, first_addr
= -1;
2452 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2453 int insert_lnop_after
= 0;
2454 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2457 if (first_addr
== -1)
2458 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2459 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2460 length
= get_attr_length (insn
);
2462 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2464 /* We test for 14 instructions because the first hbrp will add
2465 up to 2 instructions. */
2466 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2469 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2471 /* Make sure an hbrp is at least 2 cycles away from a hint.
2472 Insert an lnop after the hbrp when necessary. */
2473 if (before_4
== 0 && addr
> 0)
2476 insert_lnop_after
|= 1;
2478 else if (before_4
&& addr
<= 4 * 4)
2479 insert_lnop_after
|= 1;
2480 if (before_16
== 0 && addr
> 10 * 4)
2483 insert_lnop_after
|= 2;
2485 else if (before_16
&& addr
<= 14 * 4)
2486 insert_lnop_after
|= 2;
2489 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2491 if (addr
< hbrp_addr0
)
2493 else if (addr
< hbrp_addr1
)
2497 if (CALL_P (insn
) || JUMP_P (insn
))
2499 if (HINTED_P (insn
))
2502 /* Any branch after the first 15 insns should be on an even
2503 address to avoid a special case branch. There might be
2504 some nops and/or hbrps inserted, so we test after 10
2507 SCHED_ON_EVEN_P (insn
) = 1;
2510 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2514 if (addr
+ length
>= 32 * 4)
2516 gcc_assert (before_4
&& before_16
);
2517 if (hbrp_addr0
> 4 * 4)
2520 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2521 recog_memoized (insn
);
2522 INSN_ADDRESSES_NEW (insn
,
2523 INSN_ADDRESSES (INSN_UID (before_4
)));
2524 PUT_MODE (insn
, GET_MODE (before_4
));
2525 PUT_MODE (before_4
, TImode
);
2526 if (insert_lnop_after
& 1)
2528 insn
= emit_insn_before (gen_lnop (), before_4
);
2529 recog_memoized (insn
);
2530 INSN_ADDRESSES_NEW (insn
,
2531 INSN_ADDRESSES (INSN_UID (before_4
)));
2532 PUT_MODE (insn
, TImode
);
2535 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2536 && hbrp_addr1
> 16 * 4)
2539 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2540 recog_memoized (insn
);
2541 INSN_ADDRESSES_NEW (insn
,
2542 INSN_ADDRESSES (INSN_UID (before_16
)));
2543 PUT_MODE (insn
, GET_MODE (before_16
));
2544 PUT_MODE (before_16
, TImode
);
2545 if (insert_lnop_after
& 2)
2547 insn
= emit_insn_before (gen_lnop (), before_16
);
2548 recog_memoized (insn
);
2549 INSN_ADDRESSES_NEW (insn
,
2550 INSN_ADDRESSES (INSN_UID
2552 PUT_MODE (insn
, TImode
);
2558 else if (BARRIER_P (insn
))
2563 /* The SPU might hang when it executes 48 inline instructions after a
2564 hinted branch jumps to its hinted target. The beginning of a
2565 function and the return from a call might have been hinted, and must
2566 be handled as well. To prevent a hang we insert 2 hbrps. The first
2567 should be within 6 insns of the branch target. The second should be
2568 within 22 insns of the branch target. When determining if hbrps are
2569 necessary, we look for only 32 inline instructions, because up to to
2570 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2571 new hbrps, we insert them within 4 and 16 insns of the target. */
2576 if (TARGET_SAFE_HINTS
)
2578 shorten_branches (get_insns ());
2579 /* Insert hbrp at beginning of function */
2580 insn
= next_active_insn (get_insns ());
2582 insert_hbrp_for_ilb_runout (insn
);
2583 /* Insert hbrp after hinted targets. */
2584 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2585 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2586 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2590 static int in_spu_reorg
;
2592 /* Insert branch hints. There are no branch optimizations after this
2593 pass, so it's safe to set our branch hints now. */
2595 spu_machine_dependent_reorg (void)
2600 rtx branch_target
= 0;
2601 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2605 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2607 /* We still do it for unoptimized code because an external
2608 function might have hinted a call or return. */
2614 blocks
= sbitmap_alloc (last_basic_block
);
2615 sbitmap_zero (blocks
);
2618 compute_bb_for_insn ();
2623 (struct spu_bb_info
*) xcalloc (n_basic_blocks
,
2624 sizeof (struct spu_bb_info
));
2626 /* We need exact insn addresses and lengths. */
2627 shorten_branches (get_insns ());
2629 for (i
= n_basic_blocks
- 1; i
>= 0; i
--)
2631 bb
= BASIC_BLOCK (i
);
2633 if (spu_bb_info
[i
].prop_jump
)
2635 branch
= spu_bb_info
[i
].prop_jump
;
2636 branch_target
= get_branch_target (branch
);
2637 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2638 required_dist
= spu_hint_dist
;
2640 /* Search from end of a block to beginning. In this loop, find
2641 jumps which need a branch and emit them only when:
2642 - it's an indirect branch and we're at the insn which sets
2644 - we're at an insn that will invalidate the hint. e.g., a
2645 call, another hint insn, inline asm that clobbers $hbr, and
2646 some inlined operations (divmodsi4). Don't consider jumps
2647 because they are only at the end of a block and are
2648 considered when we are deciding whether to propagate
2649 - we're getting too far away from the branch. The hbr insns
2650 only have a signed 10 bit offset
2651 We go back as far as possible so the branch will be considered
2652 for propagation when we get to the beginning of the block. */
2653 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2657 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2659 && ((GET_CODE (branch_target
) == REG
2660 && set_of (branch_target
, insn
) != NULL_RTX
)
2661 || insn_clobbers_hbr (insn
)
2662 || branch_addr
- insn_addr
> 600))
2664 rtx next
= NEXT_INSN (insn
);
2665 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2666 if (insn
!= BB_END (bb
)
2667 && branch_addr
- next_addr
>= required_dist
)
2671 "hint for %i in block %i before %i\n",
2672 INSN_UID (branch
), bb
->index
,
2674 spu_emit_branch_hint (next
, branch
, branch_target
,
2675 branch_addr
- next_addr
, blocks
);
2680 /* JUMP_P will only be true at the end of a block. When
2681 branch is already set it means we've previously decided
2682 to propagate a hint for that branch into this block. */
2683 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2686 if ((branch_target
= get_branch_target (insn
)))
2689 branch_addr
= insn_addr
;
2690 required_dist
= spu_hint_dist
;
2694 if (insn
== BB_HEAD (bb
))
2700 /* If we haven't emitted a hint for this branch yet, it might
2701 be profitable to emit it in one of the predecessor blocks,
2702 especially for loops. */
2704 basic_block prev
= 0, prop
= 0, prev2
= 0;
2705 int loop_exit
= 0, simple_loop
= 0;
2706 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2708 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2709 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2710 prev
= EDGE_PRED (bb
, j
)->src
;
2712 prev2
= EDGE_PRED (bb
, j
)->src
;
2714 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2715 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2717 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2720 /* If this branch is a loop exit then propagate to previous
2721 fallthru block. This catches the cases when it is a simple
2722 loop or when there is an initial branch into the loop. */
2723 if (prev
&& (loop_exit
|| simple_loop
)
2724 && prev
->loop_depth
<= bb
->loop_depth
)
2727 /* If there is only one adjacent predecessor. Don't propagate
2728 outside this loop. This loop_depth test isn't perfect, but
2729 I'm not sure the loop_father member is valid at this point. */
2730 else if (prev
&& single_pred_p (bb
)
2731 && prev
->loop_depth
== bb
->loop_depth
)
2734 /* If this is the JOIN block of a simple IF-THEN then
2735 propogate the hint to the HEADER block. */
2736 else if (prev
&& prev2
2737 && EDGE_COUNT (bb
->preds
) == 2
2738 && EDGE_COUNT (prev
->preds
) == 1
2739 && EDGE_PRED (prev
, 0)->src
== prev2
2740 && prev2
->loop_depth
== bb
->loop_depth
2741 && GET_CODE (branch_target
) != REG
)
2744 /* Don't propagate when:
2745 - this is a simple loop and the hint would be too far
2746 - this is not a simple loop and there are 16 insns in
2748 - the predecessor block ends in a branch that will be
2750 - the predecessor block ends in an insn that invalidates
2754 && (bbend
= BB_END (prop
))
2755 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2756 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2757 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2760 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2761 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2762 bb
->index
, prop
->index
, bb
->loop_depth
,
2763 INSN_UID (branch
), loop_exit
, simple_loop
,
2764 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2766 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2767 spu_bb_info
[prop
->index
].bb_index
= i
;
2769 else if (branch_addr
- next_addr
>= required_dist
)
2772 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2773 INSN_UID (branch
), bb
->index
,
2774 INSN_UID (NEXT_INSN (insn
)));
2775 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2776 branch_addr
- next_addr
, blocks
);
2783 if (!sbitmap_empty_p (blocks
))
2784 find_many_sub_basic_blocks (blocks
);
2786 /* We have to schedule to make sure alignment is ok. */
2787 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2789 /* The hints need to be scheduled, so call it again. */
2796 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2797 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2799 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2800 between its branch label and the branch . We don't move the
2801 label because GCC expects it at the beginning of the block. */
2802 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2803 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2804 rtx label
= XEXP (label_ref
, 0);
2807 for (branch
= NEXT_INSN (label
);
2808 !JUMP_P (branch
) && !CALL_P (branch
);
2809 branch
= NEXT_INSN (branch
))
2810 if (NONJUMP_INSN_P (branch
))
2811 offset
+= get_attr_length (branch
);
2813 XVECEXP (unspec
, 0, 0) = plus_constant (label_ref
, offset
);
2816 if (spu_flag_var_tracking
)
2819 timevar_push (TV_VAR_TRACKING
);
2820 variable_tracking_main ();
2821 timevar_pop (TV_VAR_TRACKING
);
2822 df_finish_pass (false);
2825 free_bb_for_insn ();
2831 /* Insn scheduling routines, primarily for dual issue. */
2833 spu_sched_issue_rate (void)
2839 uses_ls_unit(rtx insn
)
2841 rtx set
= single_set (insn
);
2843 && (GET_CODE (SET_DEST (set
)) == MEM
2844 || GET_CODE (SET_SRC (set
)) == MEM
))
2853 /* Handle inline asm */
2854 if (INSN_CODE (insn
) == -1)
2856 t
= get_attr_type (insn
);
2881 case TYPE_IPREFETCH
:
2889 /* haifa-sched.c has a static variable that keeps track of the current
2890 cycle. It is passed to spu_sched_reorder, and we record it here for
2891 use by spu_sched_variable_issue. It won't be accurate if the
2892 scheduler updates it's clock_var between the two calls. */
2893 static int clock_var
;
2895 /* This is used to keep track of insn alignment. Set to 0 at the
2896 beginning of each block and increased by the "length" attr of each
2898 static int spu_sched_length
;
2900 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2901 ready list appropriately in spu_sched_reorder(). */
2902 static int pipe0_clock
;
2903 static int pipe1_clock
;
2905 static int prev_clock_var
;
2907 static int prev_priority
;
2909 /* The SPU needs to load the next ilb sometime during the execution of
2910 the previous ilb. There is a potential conflict if every cycle has a
2911 load or store. To avoid the conflict we make sure the load/store
2912 unit is free for at least one cycle during the execution of insns in
2913 the previous ilb. */
2914 static int spu_ls_first
;
2915 static int prev_ls_clock
;
2918 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2919 int max_ready ATTRIBUTE_UNUSED
)
2921 spu_sched_length
= 0;
2925 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2926 int max_ready ATTRIBUTE_UNUSED
)
2928 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2930 /* When any block might be at least 8-byte aligned, assume they
2931 will all be at least 8-byte aligned to make sure dual issue
2932 works out correctly. */
2933 spu_sched_length
= 0;
2935 spu_ls_first
= INT_MAX
;
2940 prev_clock_var
= -1;
2945 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2946 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
2950 if (GET_CODE (PATTERN (insn
)) == USE
2951 || GET_CODE (PATTERN (insn
)) == CLOBBER
2952 || (len
= get_attr_length (insn
)) == 0)
2955 spu_sched_length
+= len
;
2957 /* Reset on inline asm */
2958 if (INSN_CODE (insn
) == -1)
2960 spu_ls_first
= INT_MAX
;
2965 p
= get_pipe (insn
);
2967 pipe0_clock
= clock_var
;
2969 pipe1_clock
= clock_var
;
2973 if (clock_var
- prev_ls_clock
> 1
2974 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2975 spu_ls_first
= INT_MAX
;
2976 if (uses_ls_unit (insn
))
2978 if (spu_ls_first
== INT_MAX
)
2979 spu_ls_first
= spu_sched_length
;
2980 prev_ls_clock
= clock_var
;
2983 /* The scheduler hasn't inserted the nop, but we will later on.
2984 Include those nops in spu_sched_length. */
2985 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2986 spu_sched_length
+= 4;
2987 prev_clock_var
= clock_var
;
2989 /* more is -1 when called from spu_sched_reorder for new insns
2990 that don't have INSN_PRIORITY */
2992 prev_priority
= INSN_PRIORITY (insn
);
2995 /* Always try issueing more insns. spu_sched_reorder will decide
2996 when the cycle should be advanced. */
3000 /* This function is called for both TARGET_SCHED_REORDER and
3001 TARGET_SCHED_REORDER2. */
3003 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3004 rtx
*ready
, int *nreadyp
, int clock
)
3006 int i
, nready
= *nreadyp
;
3007 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
3012 if (nready
<= 0 || pipe1_clock
>= clock
)
3015 /* Find any rtl insns that don't generate assembly insns and schedule
3017 for (i
= nready
- 1; i
>= 0; i
--)
3020 if (INSN_CODE (insn
) == -1
3021 || INSN_CODE (insn
) == CODE_FOR_blockage
3022 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
3024 ready
[i
] = ready
[nready
- 1];
3025 ready
[nready
- 1] = insn
;
3030 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
3031 for (i
= 0; i
< nready
; i
++)
3032 if (INSN_CODE (ready
[i
]) != -1)
3035 switch (get_attr_type (insn
))
3060 case TYPE_IPREFETCH
:
3066 /* In the first scheduling phase, schedule loads and stores together
3067 to increase the chance they will get merged during postreload CSE. */
3068 if (!reload_completed
&& pipe_ls
>= 0)
3070 insn
= ready
[pipe_ls
];
3071 ready
[pipe_ls
] = ready
[nready
- 1];
3072 ready
[nready
- 1] = insn
;
3076 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3080 /* When we have loads/stores in every cycle of the last 15 insns and
3081 we are about to schedule another load/store, emit an hbrp insn
3084 && spu_sched_length
- spu_ls_first
>= 4 * 15
3085 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
3087 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3088 recog_memoized (insn
);
3089 if (pipe0_clock
< clock
)
3090 PUT_MODE (insn
, TImode
);
3091 spu_sched_variable_issue (file
, verbose
, insn
, -1);
3095 /* In general, we want to emit nops to increase dual issue, but dual
3096 issue isn't faster when one of the insns could be scheduled later
3097 without effecting the critical path. We look at INSN_PRIORITY to
3098 make a good guess, but it isn't perfect so -mdual-nops=n can be
3099 used to effect it. */
3100 if (in_spu_reorg
&& spu_dual_nops
< 10)
3102 /* When we are at an even address and we are not issueing nops to
3103 improve scheduling then we need to advance the cycle. */
3104 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
3105 && (spu_dual_nops
== 0
3108 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
3111 /* When at an odd address, schedule the highest priority insn
3112 without considering pipeline. */
3113 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
3114 && (spu_dual_nops
== 0
3116 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
3121 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3122 pipe0 insn in the ready list, schedule it. */
3123 if (pipe0_clock
< clock
&& pipe_0
>= 0)
3124 schedule_i
= pipe_0
;
3126 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3127 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3129 schedule_i
= pipe_1
;
3131 if (schedule_i
> -1)
3133 insn
= ready
[schedule_i
];
3134 ready
[schedule_i
] = ready
[nready
- 1];
3135 ready
[nready
- 1] = insn
;
3141 /* INSN is dependent on DEP_INSN. */
3143 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
3147 /* The blockage pattern is used to prevent instructions from being
3148 moved across it and has no cost. */
3149 if (INSN_CODE (insn
) == CODE_FOR_blockage
3150 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3153 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3154 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3157 /* Make sure hbrps are spread out. */
3158 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3159 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3162 /* Make sure hints and hbrps are 2 cycles apart. */
3163 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3164 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3165 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3166 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3169 /* An hbrp has no real dependency on other insns. */
3170 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3171 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3174 /* Assuming that it is unlikely an argument register will be used in
3175 the first cycle of the called function, we reduce the cost for
3176 slightly better scheduling of dep_insn. When not hinted, the
3177 mispredicted branch would hide the cost as well. */
3180 rtx target
= get_branch_target (insn
);
3181 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3186 /* And when returning from a function, let's assume the return values
3187 are completed sooner too. */
3188 if (CALL_P (dep_insn
))
3191 /* Make sure an instruction that loads from the back chain is schedule
3192 away from the return instruction so a hint is more likely to get
3194 if (INSN_CODE (insn
) == CODE_FOR__return
3195 && (set
= single_set (dep_insn
))
3196 && GET_CODE (SET_DEST (set
)) == REG
3197 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3200 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3201 scheduler makes every insn in a block anti-dependent on the final
3202 jump_insn. We adjust here so higher cost insns will get scheduled
3204 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3205 return insn_cost (dep_insn
) - 3;
3210 /* Create a CONST_DOUBLE from a string. */
3212 spu_float_const (const char *string
, enum machine_mode mode
)
3214 REAL_VALUE_TYPE value
;
3215 value
= REAL_VALUE_ATOF (string
, mode
);
3216 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3220 spu_constant_address_p (rtx x
)
3222 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3223 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3224 || GET_CODE (x
) == HIGH
);
3227 static enum spu_immediate
3228 which_immediate_load (HOST_WIDE_INT val
)
3230 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3232 if (val
>= -0x8000 && val
<= 0x7fff)
3234 if (val
>= 0 && val
<= 0x3ffff)
3236 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3238 if ((val
& 0xffff) == 0)
3244 /* Return true when OP can be loaded by one of the il instructions, or
3245 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3247 immediate_load_p (rtx op
, enum machine_mode mode
)
3249 if (CONSTANT_P (op
))
3251 enum immediate_class c
= classify_immediate (op
, mode
);
3252 return c
== IC_IL1
|| c
== IC_IL1s
3253 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3258 /* Return true if the first SIZE bytes of arr is a constant that can be
3259 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3260 represent the size and offset of the instruction to use. */
3262 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3264 int cpat
, run
, i
, start
;
3268 for (i
= 0; i
< size
&& cpat
; i
++)
3276 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3278 else if (arr
[i
] == 0)
3280 while (arr
[i
+run
] == run
&& i
+run
< 16)
3282 if (run
!= 4 && run
!= 8)
3287 if ((i
& (run
-1)) != 0)
3294 if (cpat
&& (run
|| size
< 16))
3301 *pstart
= start
== -1 ? 16-run
: start
;
3307 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3308 it into a register. MODE is only valid when OP is a CONST_INT. */
3309 static enum immediate_class
3310 classify_immediate (rtx op
, enum machine_mode mode
)
3313 unsigned char arr
[16];
3314 int i
, j
, repeated
, fsmbi
, repeat
;
3316 gcc_assert (CONSTANT_P (op
));
3318 if (GET_MODE (op
) != VOIDmode
)
3319 mode
= GET_MODE (op
);
3321 /* A V4SI const_vector with all identical symbols is ok. */
3324 && GET_CODE (op
) == CONST_VECTOR
3325 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3326 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3327 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3328 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3329 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3330 op
= CONST_VECTOR_ELT (op
, 0);
3332 switch (GET_CODE (op
))
3336 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3339 /* We can never know if the resulting address fits in 18 bits and can be
3340 loaded with ila. For now, assume the address will not overflow if
3341 the displacement is "small" (fits 'K' constraint). */
3342 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3344 rtx sym
= XEXP (XEXP (op
, 0), 0);
3345 rtx cst
= XEXP (XEXP (op
, 0), 1);
3347 if (GET_CODE (sym
) == SYMBOL_REF
3348 && GET_CODE (cst
) == CONST_INT
3349 && satisfies_constraint_K (cst
))
3358 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3359 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3360 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3366 constant_to_array (mode
, op
, arr
);
3368 /* Check that each 4-byte slot is identical. */
3370 for (i
= 4; i
< 16; i
+= 4)
3371 for (j
= 0; j
< 4; j
++)
3372 if (arr
[j
] != arr
[i
+ j
])
3377 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3378 val
= trunc_int_for_mode (val
, SImode
);
3380 if (which_immediate_load (val
) != SPU_NONE
)
3384 /* Any mode of 2 bytes or smaller can be loaded with an il
3386 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3390 for (i
= 0; i
< 16 && fsmbi
; i
++)
3391 if (arr
[i
] != 0 && repeat
== 0)
3393 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3396 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3398 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3411 static enum spu_immediate
3412 which_logical_immediate (HOST_WIDE_INT val
)
3414 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3416 if (val
>= -0x200 && val
<= 0x1ff)
3418 if (val
>= 0 && val
<= 0xffff)
3420 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3422 val
= trunc_int_for_mode (val
, HImode
);
3423 if (val
>= -0x200 && val
<= 0x1ff)
3425 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3427 val
= trunc_int_for_mode (val
, QImode
);
3428 if (val
>= -0x200 && val
<= 0x1ff)
3435 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3438 const_vector_immediate_p (rtx x
)
3441 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3442 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3443 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3444 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3450 logical_immediate_p (rtx op
, enum machine_mode mode
)
3453 unsigned char arr
[16];
3456 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3457 || GET_CODE (op
) == CONST_VECTOR
);
3459 if (GET_CODE (op
) == CONST_VECTOR
3460 && !const_vector_immediate_p (op
))
3463 if (GET_MODE (op
) != VOIDmode
)
3464 mode
= GET_MODE (op
);
3466 constant_to_array (mode
, op
, arr
);
3468 /* Check that bytes are repeated. */
3469 for (i
= 4; i
< 16; i
+= 4)
3470 for (j
= 0; j
< 4; j
++)
3471 if (arr
[j
] != arr
[i
+ j
])
3474 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3475 val
= trunc_int_for_mode (val
, SImode
);
3477 i
= which_logical_immediate (val
);
3478 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3482 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3485 unsigned char arr
[16];
3488 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3489 || GET_CODE (op
) == CONST_VECTOR
);
3491 if (GET_CODE (op
) == CONST_VECTOR
3492 && !const_vector_immediate_p (op
))
3495 if (GET_MODE (op
) != VOIDmode
)
3496 mode
= GET_MODE (op
);
3498 constant_to_array (mode
, op
, arr
);
3500 /* Check that bytes are repeated. */
3501 for (i
= 4; i
< 16; i
+= 4)
3502 for (j
= 0; j
< 4; j
++)
3503 if (arr
[j
] != arr
[i
+ j
])
3506 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3507 val
= trunc_int_for_mode (val
, SImode
);
3509 return val
>= 0 && val
<= 0xffff;
3513 arith_immediate_p (rtx op
, enum machine_mode mode
,
3514 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3517 unsigned char arr
[16];
3520 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3521 || GET_CODE (op
) == CONST_VECTOR
);
3523 if (GET_CODE (op
) == CONST_VECTOR
3524 && !const_vector_immediate_p (op
))
3527 if (GET_MODE (op
) != VOIDmode
)
3528 mode
= GET_MODE (op
);
3530 constant_to_array (mode
, op
, arr
);
3532 if (VECTOR_MODE_P (mode
))
3533 mode
= GET_MODE_INNER (mode
);
3535 bytes
= GET_MODE_SIZE (mode
);
3536 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3538 /* Check that bytes are repeated. */
3539 for (i
= bytes
; i
< 16; i
+= bytes
)
3540 for (j
= 0; j
< bytes
; j
++)
3541 if (arr
[j
] != arr
[i
+ j
])
3545 for (j
= 1; j
< bytes
; j
++)
3546 val
= (val
<< 8) | arr
[j
];
3548 val
= trunc_int_for_mode (val
, mode
);
3550 return val
>= low
&& val
<= high
;
3553 /* TRUE when op is an immediate and an exact power of 2, and given that
3554 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3555 all entries must be the same. */
3557 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3559 enum machine_mode int_mode
;
3561 unsigned char arr
[16];
3564 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3565 || GET_CODE (op
) == CONST_VECTOR
);
3567 if (GET_CODE (op
) == CONST_VECTOR
3568 && !const_vector_immediate_p (op
))
3571 if (GET_MODE (op
) != VOIDmode
)
3572 mode
= GET_MODE (op
);
3574 constant_to_array (mode
, op
, arr
);
3576 if (VECTOR_MODE_P (mode
))
3577 mode
= GET_MODE_INNER (mode
);
3579 bytes
= GET_MODE_SIZE (mode
);
3580 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3582 /* Check that bytes are repeated. */
3583 for (i
= bytes
; i
< 16; i
+= bytes
)
3584 for (j
= 0; j
< bytes
; j
++)
3585 if (arr
[j
] != arr
[i
+ j
])
3589 for (j
= 1; j
< bytes
; j
++)
3590 val
= (val
<< 8) | arr
[j
];
3592 val
= trunc_int_for_mode (val
, int_mode
);
3594 /* Currently, we only handle SFmode */
3595 gcc_assert (mode
== SFmode
);
3598 int exp
= (val
>> 23) - 127;
3599 return val
> 0 && (val
& 0x007fffff) == 0
3600 && exp
>= low
&& exp
<= high
;
3606 - any 32-bit constant (SImode, SFmode)
3607 - any constant that can be generated with fsmbi (any mode)
3608 - a 64-bit constant where the high and low bits are identical
3610 - a 128-bit constant where the four 32-bit words match. */
3612 spu_legitimate_constant_p (rtx x
)
3614 if (GET_CODE (x
) == HIGH
)
3616 /* V4SI with all identical symbols is valid. */
3618 && GET_MODE (x
) == V4SImode
3619 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3620 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3621 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3622 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3623 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3624 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3626 if (GET_CODE (x
) == CONST_VECTOR
3627 && !const_vector_immediate_p (x
))
3632 /* Valid address are:
3633 - symbol_ref, label_ref, const
3635 - reg + const_int, where const_int is 16 byte aligned
3636 - reg + reg, alignment doesn't matter
3637 The alignment matters in the reg+const case because lqd and stqd
3638 ignore the 4 least significant bits of the const. We only care about
3639 16 byte modes because the expand phase will change all smaller MEM
3640 references to TImode. */
3642 spu_legitimate_address_p (enum machine_mode mode
,
3643 rtx x
, bool reg_ok_strict
)
3645 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3647 && GET_CODE (x
) == AND
3648 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3649 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3651 switch (GET_CODE (x
))
3656 return !TARGET_LARGE_MEM
;
3659 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3667 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3672 rtx op0
= XEXP (x
, 0);
3673 rtx op1
= XEXP (x
, 1);
3674 if (GET_CODE (op0
) == SUBREG
)
3675 op0
= XEXP (op0
, 0);
3676 if (GET_CODE (op1
) == SUBREG
)
3677 op1
= XEXP (op1
, 0);
3678 if (GET_CODE (op0
) == REG
3679 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3680 && GET_CODE (op1
) == CONST_INT
3681 && INTVAL (op1
) >= -0x2000
3682 && INTVAL (op1
) <= 0x1fff
3683 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3685 if (GET_CODE (op0
) == REG
3686 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3687 && GET_CODE (op1
) == REG
3688 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3699 /* When the address is reg + const_int, force the const_int into a
3702 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3703 enum machine_mode mode ATTRIBUTE_UNUSED
)
3706 /* Make sure both operands are registers. */
3707 if (GET_CODE (x
) == PLUS
)
3711 if (ALIGNED_SYMBOL_REF_P (op0
))
3713 op0
= force_reg (Pmode
, op0
);
3714 mark_reg_pointer (op0
, 128);
3716 else if (GET_CODE (op0
) != REG
)
3717 op0
= force_reg (Pmode
, op0
);
3718 if (ALIGNED_SYMBOL_REF_P (op1
))
3720 op1
= force_reg (Pmode
, op1
);
3721 mark_reg_pointer (op1
, 128);
3723 else if (GET_CODE (op1
) != REG
)
3724 op1
= force_reg (Pmode
, op1
);
3725 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3730 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3731 struct attribute_spec.handler. */
3733 spu_handle_fndecl_attribute (tree
* node
,
3735 tree args ATTRIBUTE_UNUSED
,
3736 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3738 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3740 warning (0, "%qE attribute only applies to functions",
3742 *no_add_attrs
= true;
3748 /* Handle the "vector" attribute. */
3750 spu_handle_vector_attribute (tree
* node
, tree name
,
3751 tree args ATTRIBUTE_UNUSED
,
3752 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3754 tree type
= *node
, result
= NULL_TREE
;
3755 enum machine_mode mode
;
3758 while (POINTER_TYPE_P (type
)
3759 || TREE_CODE (type
) == FUNCTION_TYPE
3760 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3761 type
= TREE_TYPE (type
);
3763 mode
= TYPE_MODE (type
);
3765 unsigned_p
= TYPE_UNSIGNED (type
);
3769 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3772 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3775 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3778 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3781 result
= V4SF_type_node
;
3784 result
= V2DF_type_node
;
3790 /* Propagate qualifiers attached to the element type
3791 onto the vector type. */
3792 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3793 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3795 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3798 warning (0, "%qE attribute ignored", name
);
3800 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3805 /* Return nonzero if FUNC is a naked function. */
3807 spu_naked_function_p (tree func
)
3811 if (TREE_CODE (func
) != FUNCTION_DECL
)
3814 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3815 return a
!= NULL_TREE
;
3819 spu_initial_elimination_offset (int from
, int to
)
3821 int saved_regs_size
= spu_saved_regs_size ();
3823 if (!current_function_is_leaf
|| crtl
->outgoing_args_size
3824 || get_frame_size () || saved_regs_size
)
3825 sp_offset
= STACK_POINTER_OFFSET
;
3826 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3827 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3828 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3829 return get_frame_size ();
3830 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3831 return sp_offset
+ crtl
->outgoing_args_size
3832 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3833 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3834 return get_frame_size () + saved_regs_size
+ sp_offset
;
3840 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3842 enum machine_mode mode
= TYPE_MODE (type
);
3843 int byte_size
= ((mode
== BLKmode
)
3844 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3846 /* Make sure small structs are left justified in a register. */
3847 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3848 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3850 enum machine_mode smode
;
3853 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3854 int n
= byte_size
/ UNITS_PER_WORD
;
3855 v
= rtvec_alloc (nregs
);
3856 for (i
= 0; i
< n
; i
++)
3858 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3859 gen_rtx_REG (TImode
,
3862 GEN_INT (UNITS_PER_WORD
* i
));
3863 byte_size
-= UNITS_PER_WORD
;
3871 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3873 gen_rtx_EXPR_LIST (VOIDmode
,
3874 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3875 GEN_INT (UNITS_PER_WORD
* n
));
3877 return gen_rtx_PARALLEL (mode
, v
);
3879 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3883 spu_function_arg (CUMULATIVE_ARGS cum
,
3884 enum machine_mode mode
,
3885 tree type
, int named ATTRIBUTE_UNUSED
)
3889 if (cum
>= MAX_REGISTER_ARGS
)
3892 byte_size
= ((mode
== BLKmode
)
3893 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3895 /* The ABI does not allow parameters to be passed partially in
3896 reg and partially in stack. */
3897 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3900 /* Make sure small structs are left justified in a register. */
3901 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3902 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3904 enum machine_mode smode
;
3908 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3909 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3910 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
3912 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3915 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
3918 /* Variable sized types are passed by reference. */
3920 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
3921 enum machine_mode mode ATTRIBUTE_UNUSED
,
3922 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3924 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3930 /* Create and return the va_list datatype.
3932 On SPU, va_list is an array type equivalent to
3934 typedef struct __va_list_tag
3936 void *__args __attribute__((__aligned(16)));
3937 void *__skip __attribute__((__aligned(16)));
3941 where __args points to the arg that will be returned by the next
3942 va_arg(), and __skip points to the previous stack frame such that
3943 when __args == __skip we should advance __args by 32 bytes. */
3945 spu_build_builtin_va_list (void)
3947 tree f_args
, f_skip
, record
, type_decl
;
3950 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3953 build_decl (BUILTINS_LOCATION
,
3954 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3956 f_args
= build_decl (BUILTINS_LOCATION
,
3957 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3958 f_skip
= build_decl (BUILTINS_LOCATION
,
3959 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3961 DECL_FIELD_CONTEXT (f_args
) = record
;
3962 DECL_ALIGN (f_args
) = 128;
3963 DECL_USER_ALIGN (f_args
) = 1;
3965 DECL_FIELD_CONTEXT (f_skip
) = record
;
3966 DECL_ALIGN (f_skip
) = 128;
3967 DECL_USER_ALIGN (f_skip
) = 1;
3969 TREE_CHAIN (record
) = type_decl
;
3970 TYPE_NAME (record
) = type_decl
;
3971 TYPE_FIELDS (record
) = f_args
;
3972 TREE_CHAIN (f_args
) = f_skip
;
3974 /* We know this is being padded and we want it too. It is an internal
3975 type so hide the warnings from the user. */
3977 warn_padded
= false;
3979 layout_type (record
);
3983 /* The correct type is an array type of one element. */
3984 return build_array_type (record
, build_index_type (size_zero_node
));
3987 /* Implement va_start by filling the va_list structure VALIST.
3988 NEXTARG points to the first anonymous stack argument.
3990 The following global variables are used to initialize
3991 the va_list structure:
3994 the CUMULATIVE_ARGS for this function
3996 crtl->args.arg_offset_rtx:
3997 holds the offset of the first anonymous stack argument
3998 (relative to the virtual arg pointer). */
4001 spu_va_start (tree valist
, rtx nextarg
)
4003 tree f_args
, f_skip
;
4006 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4007 f_skip
= TREE_CHAIN (f_args
);
4009 valist
= build_va_arg_indirect_ref (valist
);
4011 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4013 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4015 /* Find the __args area. */
4016 t
= make_tree (TREE_TYPE (args
), nextarg
);
4017 if (crtl
->args
.pretend_args_size
> 0)
4018 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
4019 size_int (-STACK_POINTER_OFFSET
));
4020 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4021 TREE_SIDE_EFFECTS (t
) = 1;
4022 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4024 /* Find the __skip area. */
4025 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4026 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
4027 size_int (crtl
->args
.pretend_args_size
4028 - STACK_POINTER_OFFSET
));
4029 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4030 TREE_SIDE_EFFECTS (t
) = 1;
4031 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4034 /* Gimplify va_arg by updating the va_list structure
4035 VALIST as required to retrieve an argument of type
4036 TYPE, and returning that argument.
4038 ret = va_arg(VALIST, TYPE);
4040 generates code equivalent to:
4042 paddedsize = (sizeof(TYPE) + 15) & -16;
4043 if (VALIST.__args + paddedsize > VALIST.__skip
4044 && VALIST.__args <= VALIST.__skip)
4045 addr = VALIST.__skip + 32;
4047 addr = VALIST.__args;
4048 VALIST.__args = addr + paddedsize;
4049 ret = *(TYPE *)addr;
4052 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4053 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4055 tree f_args
, f_skip
;
4057 HOST_WIDE_INT size
, rsize
;
4058 tree paddedsize
, addr
, tmp
;
4059 bool pass_by_reference_p
;
4061 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4062 f_skip
= TREE_CHAIN (f_args
);
4064 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4066 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4068 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4070 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4072 /* if an object is dynamically sized, a pointer to it is passed
4073 instead of the object itself. */
4074 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4076 if (pass_by_reference_p
)
4077 type
= build_pointer_type (type
);
4078 size
= int_size_in_bytes (type
);
4079 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4081 /* build conditional expression to calculate addr. The expression
4082 will be gimplified later. */
4083 paddedsize
= size_int (rsize
);
4084 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (args
), paddedsize
);
4085 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4086 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4087 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4088 unshare_expr (skip
)));
4090 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4091 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (skip
),
4092 size_int (32)), unshare_expr (args
));
4094 gimplify_assign (addr
, tmp
, pre_p
);
4096 /* update VALIST.__args */
4097 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
4098 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4100 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4103 if (pass_by_reference_p
)
4104 addr
= build_va_arg_indirect_ref (addr
);
4106 return build_va_arg_indirect_ref (addr
);
4109 /* Save parameter registers starting with the register that corresponds
4110 to the first unnamed parameters. If the first unnamed parameter is
4111 in the stack then save no registers. Set pretend_args_size to the
4112 amount of space needed to save the registers. */
4114 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4115 tree type
, int *pretend_size
, int no_rtl
)
4124 /* cum currently points to the last named argument, we want to
4125 start at the next argument. */
4126 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
4128 offset
= -STACK_POINTER_OFFSET
;
4129 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4131 tmp
= gen_frame_mem (V4SImode
,
4132 plus_constant (virtual_incoming_args_rtx
,
4134 emit_move_insn (tmp
,
4135 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4138 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4143 spu_conditional_register_usage (void)
4147 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4148 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4152 /* This is called any time we inspect the alignment of a register for
4155 reg_aligned_for_addr (rtx x
)
4158 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4159 return REGNO_POINTER_ALIGN (regno
) >= 128;
4162 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4163 into its SYMBOL_REF_FLAGS. */
4165 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4167 default_encode_section_info (decl
, rtl
, first
);
4169 /* If a variable has a forced alignment to < 16 bytes, mark it with
4170 SYMBOL_FLAG_ALIGN1. */
4171 if (TREE_CODE (decl
) == VAR_DECL
4172 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4173 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4176 /* Return TRUE if we are certain the mem refers to a complete object
4177 which is both 16-byte aligned and padded to a 16-byte boundary. This
4178 would make it safe to store with a single instruction.
4179 We guarantee the alignment and padding for static objects by aligning
4180 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4181 FIXME: We currently cannot guarantee this for objects on the stack
4182 because assign_parm_setup_stack calls assign_stack_local with the
4183 alignment of the parameter mode and in that case the alignment never
4184 gets adjusted by LOCAL_ALIGNMENT. */
4186 store_with_one_insn_p (rtx mem
)
4188 enum machine_mode mode
= GET_MODE (mem
);
4189 rtx addr
= XEXP (mem
, 0);
4190 if (mode
== BLKmode
)
4192 if (GET_MODE_SIZE (mode
) >= 16)
4194 /* Only static objects. */
4195 if (GET_CODE (addr
) == SYMBOL_REF
)
4197 /* We use the associated declaration to make sure the access is
4198 referring to the whole object.
4199 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4200 if it is necessary. Will there be cases where one exists, and
4201 the other does not? Will there be cases where both exist, but
4202 have different types? */
4203 tree decl
= MEM_EXPR (mem
);
4205 && TREE_CODE (decl
) == VAR_DECL
4206 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4208 decl
= SYMBOL_REF_DECL (addr
);
4210 && TREE_CODE (decl
) == VAR_DECL
4211 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4217 /* Return 1 when the address is not valid for a simple load and store as
4218 required by the '_mov*' patterns. We could make this less strict
4219 for loads, but we prefer mem's to look the same so they are more
4220 likely to be merged. */
4222 address_needs_split (rtx mem
)
4224 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4225 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4226 || !(store_with_one_insn_p (mem
)
4227 || mem_is_padded_component_ref (mem
))))
4234 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4236 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4239 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4241 rtx from
= SUBREG_REG (ops
[1]);
4242 enum machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4244 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4245 && GET_MODE_CLASS (imode
) == MODE_INT
4246 && subreg_lowpart_p (ops
[1]));
4248 if (GET_MODE_SIZE (imode
) < 4)
4250 if (imode
!= GET_MODE (from
))
4251 from
= gen_rtx_SUBREG (imode
, from
, 0);
4253 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4255 enum insn_code icode
= convert_optab_handler (trunc_optab
, mode
, imode
)->insn_code
;
4256 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4259 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4263 /* At least one of the operands needs to be a register. */
4264 if ((reload_in_progress
| reload_completed
) == 0
4265 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4267 rtx temp
= force_reg (mode
, ops
[1]);
4268 emit_move_insn (ops
[0], temp
);
4271 if (reload_in_progress
|| reload_completed
)
4273 if (CONSTANT_P (ops
[1]))
4274 return spu_split_immediate (ops
);
4278 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4280 if (GET_CODE (ops
[1]) == CONST_INT
)
4282 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4283 if (val
!= INTVAL (ops
[1]))
4285 emit_move_insn (ops
[0], GEN_INT (val
));
4290 return spu_split_store (ops
);
4292 return spu_split_load (ops
);
4298 spu_convert_move (rtx dst
, rtx src
)
4300 enum machine_mode mode
= GET_MODE (dst
);
4301 enum machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4303 gcc_assert (GET_MODE (src
) == TImode
);
4304 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4305 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4306 gen_rtx_TRUNCATE (int_mode
,
4307 gen_rtx_LSHIFTRT (TImode
, src
,
4308 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4309 if (int_mode
!= mode
)
4311 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4312 emit_move_insn (dst
, reg
);
4316 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4317 the address from SRC and SRC+16. Return a REG or CONST_INT that
4318 specifies how many bytes to rotate the loaded registers, plus any
4319 extra from EXTRA_ROTQBY. The address and rotate amounts are
4320 normalized to improve merging of loads and rotate computations. */
4322 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4324 rtx addr
= XEXP (src
, 0);
4325 rtx p0
, p1
, rot
, addr0
, addr1
;
4331 if (MEM_ALIGN (src
) >= 128)
4332 /* Address is already aligned; simply perform a TImode load. */ ;
4333 else if (GET_CODE (addr
) == PLUS
)
4336 aligned reg + aligned reg => lqx
4337 aligned reg + unaligned reg => lqx, rotqby
4338 aligned reg + aligned const => lqd
4339 aligned reg + unaligned const => lqd, rotqbyi
4340 unaligned reg + aligned reg => lqx, rotqby
4341 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4342 unaligned reg + aligned const => lqd, rotqby
4343 unaligned reg + unaligned const -> not allowed by legitimate address
4345 p0
= XEXP (addr
, 0);
4346 p1
= XEXP (addr
, 1);
4347 if (!reg_aligned_for_addr (p0
))
4349 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4351 rot
= gen_reg_rtx (SImode
);
4352 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4354 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4358 && INTVAL (p1
) * BITS_PER_UNIT
4359 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4361 rot
= gen_reg_rtx (SImode
);
4362 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4367 rtx x
= gen_reg_rtx (SImode
);
4368 emit_move_insn (x
, p1
);
4369 if (!spu_arith_operand (p1
, SImode
))
4371 rot
= gen_reg_rtx (SImode
);
4372 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4373 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4381 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4383 rot_amt
= INTVAL (p1
) & 15;
4384 if (INTVAL (p1
) & -16)
4386 p1
= GEN_INT (INTVAL (p1
) & -16);
4387 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4392 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4396 else if (REG_P (addr
))
4398 if (!reg_aligned_for_addr (addr
))
4401 else if (GET_CODE (addr
) == CONST
)
4403 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4404 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4405 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4407 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4409 addr
= gen_rtx_CONST (Pmode
,
4410 gen_rtx_PLUS (Pmode
,
4411 XEXP (XEXP (addr
, 0), 0),
4412 GEN_INT (rot_amt
& -16)));
4414 addr
= XEXP (XEXP (addr
, 0), 0);
4418 rot
= gen_reg_rtx (Pmode
);
4419 emit_move_insn (rot
, addr
);
4422 else if (GET_CODE (addr
) == CONST_INT
)
4424 rot_amt
= INTVAL (addr
);
4425 addr
= GEN_INT (rot_amt
& -16);
4427 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4429 rot
= gen_reg_rtx (Pmode
);
4430 emit_move_insn (rot
, addr
);
4433 rot_amt
+= extra_rotby
;
4439 rtx x
= gen_reg_rtx (SImode
);
4440 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4444 if (!rot
&& rot_amt
)
4445 rot
= GEN_INT (rot_amt
);
4447 addr0
= copy_rtx (addr
);
4448 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4449 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4453 addr1
= plus_constant (copy_rtx (addr
), 16);
4454 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4455 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4462 spu_split_load (rtx
* ops
)
4464 enum machine_mode mode
= GET_MODE (ops
[0]);
4465 rtx addr
, load
, rot
;
4468 if (GET_MODE_SIZE (mode
) >= 16)
4471 addr
= XEXP (ops
[1], 0);
4472 gcc_assert (GET_CODE (addr
) != AND
);
4474 if (!address_needs_split (ops
[1]))
4476 ops
[1] = change_address (ops
[1], TImode
, addr
);
4477 load
= gen_reg_rtx (TImode
);
4478 emit_insn (gen__movti (load
, ops
[1]));
4479 spu_convert_move (ops
[0], load
);
4483 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4485 load
= gen_reg_rtx (TImode
);
4486 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4489 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4491 spu_convert_move (ops
[0], load
);
4496 spu_split_store (rtx
* ops
)
4498 enum machine_mode mode
= GET_MODE (ops
[0]);
4500 rtx addr
, p0
, p1
, p1_lo
, smem
;
4504 if (GET_MODE_SIZE (mode
) >= 16)
4507 addr
= XEXP (ops
[0], 0);
4508 gcc_assert (GET_CODE (addr
) != AND
);
4510 if (!address_needs_split (ops
[0]))
4512 reg
= gen_reg_rtx (TImode
);
4513 emit_insn (gen_spu_convert (reg
, ops
[1]));
4514 ops
[0] = change_address (ops
[0], TImode
, addr
);
4515 emit_move_insn (ops
[0], reg
);
4519 if (GET_CODE (addr
) == PLUS
)
4522 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4523 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4524 aligned reg + aligned const => lqd, c?d, shuf, stqx
4525 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4526 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4527 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4528 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4529 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4532 p0
= XEXP (addr
, 0);
4533 p1
= p1_lo
= XEXP (addr
, 1);
4534 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4536 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4537 if (reg_aligned_for_addr (p0
))
4539 p1
= GEN_INT (INTVAL (p1
) & -16);
4540 if (p1
== const0_rtx
)
4543 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4547 rtx x
= gen_reg_rtx (SImode
);
4548 emit_move_insn (x
, p1
);
4549 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4553 else if (REG_P (addr
))
4557 p1
= p1_lo
= const0_rtx
;
4562 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4563 p1
= 0; /* aform doesn't use p1 */
4565 if (ALIGNED_SYMBOL_REF_P (addr
))
4567 else if (GET_CODE (addr
) == CONST
4568 && GET_CODE (XEXP (addr
, 0)) == PLUS
4569 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4570 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4572 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4574 addr
= gen_rtx_CONST (Pmode
,
4575 gen_rtx_PLUS (Pmode
,
4576 XEXP (XEXP (addr
, 0), 0),
4577 GEN_INT (v
& -16)));
4579 addr
= XEXP (XEXP (addr
, 0), 0);
4580 p1_lo
= GEN_INT (v
& 15);
4582 else if (GET_CODE (addr
) == CONST_INT
)
4584 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4585 addr
= GEN_INT (INTVAL (addr
) & -16);
4589 p1_lo
= gen_reg_rtx (SImode
);
4590 emit_move_insn (p1_lo
, addr
);
4594 reg
= gen_reg_rtx (TImode
);
4596 scalar
= store_with_one_insn_p (ops
[0]);
4599 /* We could copy the flags from the ops[0] MEM to mem here,
4600 We don't because we want this load to be optimized away if
4601 possible, and copying the flags will prevent that in certain
4602 cases, e.g. consider the volatile flag. */
4604 rtx pat
= gen_reg_rtx (TImode
);
4605 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4606 set_mem_alias_set (lmem
, 0);
4607 emit_insn (gen_movti (reg
, lmem
));
4609 if (!p0
|| reg_aligned_for_addr (p0
))
4610 p0
= stack_pointer_rtx
;
4614 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4615 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4619 if (GET_CODE (ops
[1]) == REG
)
4620 emit_insn (gen_spu_convert (reg
, ops
[1]));
4621 else if (GET_CODE (ops
[1]) == SUBREG
)
4622 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4627 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4628 emit_insn (gen_ashlti3
4629 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4631 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4632 /* We can't use the previous alias set because the memory has changed
4633 size and can potentially overlap objects of other types. */
4634 set_mem_alias_set (smem
, 0);
4636 emit_insn (gen_movti (smem
, reg
));
4640 /* Return TRUE if X is MEM which is a struct member reference
4641 and the member can safely be loaded and stored with a single
4642 instruction because it is padded. */
4644 mem_is_padded_component_ref (rtx x
)
4646 tree t
= MEM_EXPR (x
);
4648 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4650 t
= TREE_OPERAND (t
, 1);
4651 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4652 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4654 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4655 r
= DECL_FIELD_CONTEXT (t
);
4656 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4658 /* Make sure they are the same mode */
4659 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4661 /* If there are no following fields then the field alignment assures
4662 the structure is padded to the alignment which means this field is
4664 if (TREE_CHAIN (t
) == 0)
4666 /* If the following field is also aligned then this field will be
4669 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4674 /* Parse the -mfixed-range= option string. */
4676 fix_range (const char *const_str
)
4679 char *str
, *dash
, *comma
;
4681 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4682 REG2 are either register names or register numbers. The effect
4683 of this option is to mark the registers in the range from REG1 to
4684 REG2 as ``fixed'' so they won't be used by the compiler. */
4686 i
= strlen (const_str
);
4687 str
= (char *) alloca (i
+ 1);
4688 memcpy (str
, const_str
, i
+ 1);
4692 dash
= strchr (str
, '-');
4695 warning (0, "value of -mfixed-range must have form REG1-REG2");
4699 comma
= strchr (dash
+ 1, ',');
4703 first
= decode_reg_name (str
);
4706 warning (0, "unknown register name: %s", str
);
4710 last
= decode_reg_name (dash
+ 1);
4713 warning (0, "unknown register name: %s", dash
+ 1);
4721 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4725 for (i
= first
; i
<= last
; ++i
)
4726 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4736 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4737 can be generated using the fsmbi instruction. */
4739 fsmbi_const_p (rtx x
)
4743 /* We can always choose TImode for CONST_INT because the high bits
4744 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4745 enum immediate_class c
= classify_immediate (x
, TImode
);
4746 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4751 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4752 can be generated using the cbd, chd, cwd or cdd instruction. */
4754 cpat_const_p (rtx x
, enum machine_mode mode
)
4758 enum immediate_class c
= classify_immediate (x
, mode
);
4759 return c
== IC_CPAT
;
4765 gen_cpat_const (rtx
* ops
)
4767 unsigned char dst
[16];
4768 int i
, offset
, shift
, isize
;
4769 if (GET_CODE (ops
[3]) != CONST_INT
4770 || GET_CODE (ops
[2]) != CONST_INT
4771 || (GET_CODE (ops
[1]) != CONST_INT
4772 && GET_CODE (ops
[1]) != REG
))
4774 if (GET_CODE (ops
[1]) == REG
4775 && (!REG_POINTER (ops
[1])
4776 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
4779 for (i
= 0; i
< 16; i
++)
4781 isize
= INTVAL (ops
[3]);
4784 else if (isize
== 2)
4788 offset
= (INTVAL (ops
[2]) +
4789 (GET_CODE (ops
[1]) ==
4790 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
4791 for (i
= 0; i
< isize
; i
++)
4792 dst
[offset
+ i
] = i
+ shift
;
4793 return array_to_constant (TImode
, dst
);
4796 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4797 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4798 than 16 bytes, the value is repeated across the rest of the array. */
4800 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
4805 memset (arr
, 0, 16);
4806 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
4807 if (GET_CODE (x
) == CONST_INT
4808 || (GET_CODE (x
) == CONST_DOUBLE
4809 && (mode
== SFmode
|| mode
== DFmode
)))
4811 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
4813 if (GET_CODE (x
) == CONST_DOUBLE
)
4814 val
= const_double_to_hwint (x
);
4817 first
= GET_MODE_SIZE (mode
) - 1;
4818 for (i
= first
; i
>= 0; i
--)
4820 arr
[i
] = val
& 0xff;
4823 /* Splat the constant across the whole array. */
4824 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
4827 j
= (j
== first
) ? 0 : j
+ 1;
4830 else if (GET_CODE (x
) == CONST_DOUBLE
)
4832 val
= CONST_DOUBLE_LOW (x
);
4833 for (i
= 15; i
>= 8; i
--)
4835 arr
[i
] = val
& 0xff;
4838 val
= CONST_DOUBLE_HIGH (x
);
4839 for (i
= 7; i
>= 0; i
--)
4841 arr
[i
] = val
& 0xff;
4845 else if (GET_CODE (x
) == CONST_VECTOR
)
4849 mode
= GET_MODE_INNER (mode
);
4850 units
= CONST_VECTOR_NUNITS (x
);
4851 for (i
= 0; i
< units
; i
++)
4853 elt
= CONST_VECTOR_ELT (x
, i
);
4854 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
4856 if (GET_CODE (elt
) == CONST_DOUBLE
)
4857 val
= const_double_to_hwint (elt
);
4860 first
= GET_MODE_SIZE (mode
) - 1;
4861 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
4863 for (j
= first
; j
>= 0; j
--)
4865 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
4875 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4876 smaller than 16 bytes, use the bytes that would represent that value
4877 in a register, e.g., for QImode return the value of arr[3]. */
4879 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
4881 enum machine_mode inner_mode
;
4883 int units
, size
, i
, j
, k
;
4886 if (GET_MODE_CLASS (mode
) == MODE_INT
4887 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
4889 j
= GET_MODE_SIZE (mode
);
4890 i
= j
< 4 ? 4 - j
: 0;
4891 for (val
= 0; i
< j
; i
++)
4892 val
= (val
<< 8) | arr
[i
];
4893 val
= trunc_int_for_mode (val
, mode
);
4894 return GEN_INT (val
);
4900 for (i
= high
= 0; i
< 8; i
++)
4901 high
= (high
<< 8) | arr
[i
];
4902 for (i
= 8, val
= 0; i
< 16; i
++)
4903 val
= (val
<< 8) | arr
[i
];
4904 return immed_double_const (val
, high
, TImode
);
4908 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4909 val
= trunc_int_for_mode (val
, SImode
);
4910 return hwint_to_const_double (SFmode
, val
);
4914 for (i
= 0, val
= 0; i
< 8; i
++)
4915 val
= (val
<< 8) | arr
[i
];
4916 return hwint_to_const_double (DFmode
, val
);
4919 if (!VECTOR_MODE_P (mode
))
4922 units
= GET_MODE_NUNITS (mode
);
4923 size
= GET_MODE_UNIT_SIZE (mode
);
4924 inner_mode
= GET_MODE_INNER (mode
);
4925 v
= rtvec_alloc (units
);
4927 for (k
= i
= 0; i
< units
; ++i
)
4930 for (j
= 0; j
< size
; j
++, k
++)
4931 val
= (val
<< 8) | arr
[k
];
4933 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
4934 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
4936 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
4941 return gen_rtx_CONST_VECTOR (mode
, v
);
4945 reloc_diagnostic (rtx x
)
4948 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
4951 if (GET_CODE (x
) == SYMBOL_REF
)
4952 decl
= SYMBOL_REF_DECL (x
);
4953 else if (GET_CODE (x
) == CONST
4954 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4955 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
4957 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4958 if (decl
&& !DECL_P (decl
))
4961 /* The decl could be a string constant. */
4962 if (decl
&& DECL_P (decl
))
4965 /* We use last_assemble_variable_decl to get line information. It's
4966 not always going to be right and might not even be close, but will
4967 be right for the more common cases. */
4968 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
4969 loc
= DECL_SOURCE_LOCATION (decl
);
4971 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
4973 if (TARGET_WARN_RELOC
)
4975 "creating run-time relocation for %qD", decl
);
4978 "creating run-time relocation for %qD", decl
);
4982 if (TARGET_WARN_RELOC
)
4983 warning_at (input_location
, 0, "creating run-time relocation");
4985 error_at (input_location
, "creating run-time relocation");
4989 /* Hook into assemble_integer so we can generate an error for run-time
4990 relocations. The SPU ABI disallows them. */
4992 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4994 /* By default run-time relocations aren't supported, but we allow them
4995 in case users support it in their own run-time loader. And we provide
4996 a warning for those users that don't. */
4997 if ((GET_CODE (x
) == SYMBOL_REF
)
4998 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
4999 reloc_diagnostic (x
);
5001 return default_assemble_integer (x
, size
, aligned_p
);
5005 spu_asm_globalize_label (FILE * file
, const char *name
)
5007 fputs ("\t.global\t", file
);
5008 assemble_name (file
, name
);
5013 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
,
5014 bool speed ATTRIBUTE_UNUSED
)
5016 enum machine_mode mode
= GET_MODE (x
);
5017 int cost
= COSTS_N_INSNS (2);
5019 /* Folding to a CONST_VECTOR will use extra space but there might
5020 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5021 only if it allows us to fold away multiple insns. Changing the cost
5022 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5023 because this cost will only be compared against a single insn.
5024 if (code == CONST_VECTOR)
5025 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5028 /* Use defaults for float operations. Not accurate but good enough. */
5031 *total
= COSTS_N_INSNS (13);
5036 *total
= COSTS_N_INSNS (6);
5042 if (satisfies_constraint_K (x
))
5044 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5045 *total
= COSTS_N_INSNS (1);
5047 *total
= COSTS_N_INSNS (3);
5051 *total
= COSTS_N_INSNS (3);
5056 *total
= COSTS_N_INSNS (0);
5060 *total
= COSTS_N_INSNS (5);
5064 case FLOAT_TRUNCATE
:
5066 case UNSIGNED_FLOAT
:
5069 *total
= COSTS_N_INSNS (7);
5075 *total
= COSTS_N_INSNS (9);
5082 GET_CODE (XEXP (x
, 0)) ==
5083 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5084 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5086 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5088 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5089 cost
= COSTS_N_INSNS (14);
5090 if ((val
& 0xffff) == 0)
5091 cost
= COSTS_N_INSNS (9);
5092 else if (val
> 0 && val
< 0x10000)
5093 cost
= COSTS_N_INSNS (11);
5102 *total
= COSTS_N_INSNS (20);
5109 *total
= COSTS_N_INSNS (4);
5112 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5113 *total
= COSTS_N_INSNS (0);
5115 *total
= COSTS_N_INSNS (4);
5118 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5119 if (GET_MODE_CLASS (mode
) == MODE_INT
5120 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5121 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5122 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5127 static enum machine_mode
5128 spu_unwind_word_mode (void)
5133 /* Decide whether we can make a sibling call to a function. DECL is the
5134 declaration of the function being targeted by the call and EXP is the
5135 CALL_EXPR representing the call. */
5137 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5139 return decl
&& !TARGET_LARGE_MEM
;
5142 /* We need to correctly update the back chain pointer and the Available
5143 Stack Size (which is in the second slot of the sp register.) */
5145 spu_allocate_stack (rtx op0
, rtx op1
)
5148 rtx chain
= gen_reg_rtx (V4SImode
);
5149 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5150 rtx sp
= gen_reg_rtx (V4SImode
);
5151 rtx splatted
= gen_reg_rtx (V4SImode
);
5152 rtx pat
= gen_reg_rtx (TImode
);
5154 /* copy the back chain so we can save it back again. */
5155 emit_move_insn (chain
, stack_bot
);
5157 op1
= force_reg (SImode
, op1
);
5159 v
= 0x1020300010203ll
;
5160 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5161 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5163 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5164 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5166 if (flag_stack_check
)
5168 rtx avail
= gen_reg_rtx(SImode
);
5169 rtx result
= gen_reg_rtx(SImode
);
5170 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5171 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5172 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5175 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5177 emit_move_insn (stack_bot
, chain
);
5179 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5183 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5185 static unsigned char arr
[16] =
5186 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5187 rtx temp
= gen_reg_rtx (SImode
);
5188 rtx temp2
= gen_reg_rtx (SImode
);
5189 rtx temp3
= gen_reg_rtx (V4SImode
);
5190 rtx temp4
= gen_reg_rtx (V4SImode
);
5191 rtx pat
= gen_reg_rtx (TImode
);
5192 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5194 /* Restore the backchain from the first word, sp from the second. */
5195 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5196 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5198 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5200 /* Compute Available Stack Size for sp */
5201 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5202 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5204 /* Compute Available Stack Size for back chain */
5205 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5206 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5207 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5209 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5210 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5214 spu_init_libfuncs (void)
5216 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5217 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5218 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5219 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5220 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5221 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5222 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5223 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5224 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5225 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5226 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5228 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5229 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5231 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5232 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5233 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5234 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5235 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5236 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5239 /* Make a subreg, stripping any existing subreg. We could possibly just
5240 call simplify_subreg, but in this case we know what we want. */
5242 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5244 if (GET_CODE (x
) == SUBREG
)
5246 if (GET_MODE (x
) == mode
)
5248 return gen_rtx_SUBREG (mode
, x
, 0);
5252 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5254 return (TYPE_MODE (type
) == BLKmode
5256 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5257 || int_size_in_bytes (type
) >
5258 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5261 /* Create the built-in types and functions */
5263 enum spu_function_code
5265 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5266 #include "spu-builtins.def"
5271 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5273 struct spu_builtin_description spu_builtins
[] = {
5274 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5275 {fcode, icode, name, type, params, NULL_TREE},
5276 #include "spu-builtins.def"
5281 spu_init_builtins (void)
5283 struct spu_builtin_description
*d
;
5286 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5287 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5288 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5289 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5290 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5291 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5293 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5294 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5295 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5296 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5298 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5300 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5301 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5302 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5303 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5304 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5305 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5306 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5307 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5308 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5309 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5310 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5311 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5313 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5314 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5315 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5316 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5317 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5318 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5319 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5320 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5322 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5323 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5325 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5327 spu_builtin_types
[SPU_BTI_PTR
] =
5328 build_pointer_type (build_qualified_type
5330 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5332 /* For each builtin we build a new prototype. The tree code will make
5333 sure nodes are shared. */
5334 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5337 char name
[64]; /* build_function will make a copy. */
5343 /* Find last parm. */
5344 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5349 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5351 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5353 sprintf (name
, "__builtin_%s", d
->name
);
5355 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
5357 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5358 TREE_READONLY (d
->fndecl
) = 1;
5360 /* These builtins don't throw. */
5361 TREE_NOTHROW (d
->fndecl
) = 1;
5366 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5368 static unsigned char arr
[16] =
5369 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5371 rtx temp
= gen_reg_rtx (Pmode
);
5372 rtx temp2
= gen_reg_rtx (V4SImode
);
5373 rtx temp3
= gen_reg_rtx (V4SImode
);
5374 rtx pat
= gen_reg_rtx (TImode
);
5375 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5377 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5379 /* Restore the sp. */
5380 emit_move_insn (temp
, op1
);
5381 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5383 /* Compute available stack size for sp. */
5384 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5385 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5387 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5388 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5392 spu_safe_dma (HOST_WIDE_INT channel
)
5394 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5398 spu_builtin_splats (rtx ops
[])
5400 enum machine_mode mode
= GET_MODE (ops
[0]);
5401 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5403 unsigned char arr
[16];
5404 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5405 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5409 rtx reg
= gen_reg_rtx (TImode
);
5411 if (GET_CODE (ops
[1]) != REG
5412 && GET_CODE (ops
[1]) != SUBREG
)
5413 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5419 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5425 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5430 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5435 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5441 emit_move_insn (reg
, shuf
);
5442 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5447 spu_builtin_extract (rtx ops
[])
5449 enum machine_mode mode
;
5452 mode
= GET_MODE (ops
[1]);
5454 if (GET_CODE (ops
[2]) == CONST_INT
)
5459 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5462 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5465 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5468 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5471 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5474 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5482 from
= spu_gen_subreg (TImode
, ops
[1]);
5483 rot
= gen_reg_rtx (TImode
);
5484 tmp
= gen_reg_rtx (SImode
);
5489 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5492 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5493 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5497 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5501 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5506 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5508 emit_insn (gen_spu_convert (ops
[0], rot
));
5512 spu_builtin_insert (rtx ops
[])
5514 enum machine_mode mode
= GET_MODE (ops
[0]);
5515 enum machine_mode imode
= GET_MODE_INNER (mode
);
5516 rtx mask
= gen_reg_rtx (TImode
);
5519 if (GET_CODE (ops
[3]) == CONST_INT
)
5520 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5523 offset
= gen_reg_rtx (SImode
);
5524 emit_insn (gen_mulsi3
5525 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5528 (mask
, stack_pointer_rtx
, offset
,
5529 GEN_INT (GET_MODE_SIZE (imode
))));
5530 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5534 spu_builtin_promote (rtx ops
[])
5536 enum machine_mode mode
, imode
;
5537 rtx rot
, from
, offset
;
5540 mode
= GET_MODE (ops
[0]);
5541 imode
= GET_MODE_INNER (mode
);
5543 from
= gen_reg_rtx (TImode
);
5544 rot
= spu_gen_subreg (TImode
, ops
[0]);
5546 emit_insn (gen_spu_convert (from
, ops
[1]));
5548 if (GET_CODE (ops
[2]) == CONST_INT
)
5550 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5551 if (GET_MODE_SIZE (imode
) < 4)
5552 pos
+= 4 - GET_MODE_SIZE (imode
);
5553 offset
= GEN_INT (pos
& 15);
5557 offset
= gen_reg_rtx (SImode
);
5561 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5564 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5565 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5569 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5570 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5574 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5580 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5584 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
5586 rtx shuf
= gen_reg_rtx (V4SImode
);
5587 rtx insn
= gen_reg_rtx (V4SImode
);
5592 fnaddr
= force_reg (SImode
, fnaddr
);
5593 cxt
= force_reg (SImode
, cxt
);
5595 if (TARGET_LARGE_MEM
)
5597 rtx rotl
= gen_reg_rtx (V4SImode
);
5598 rtx mask
= gen_reg_rtx (V4SImode
);
5599 rtx bi
= gen_reg_rtx (SImode
);
5600 unsigned char shufa
[16] = {
5601 2, 3, 0, 1, 18, 19, 16, 17,
5602 0, 1, 2, 3, 16, 17, 18, 19
5604 unsigned char insna
[16] = {
5606 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5608 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5611 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5612 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5614 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5615 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5616 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5617 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5619 mem
= memory_address (Pmode
, tramp
);
5620 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
5622 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5623 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
5624 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
5628 rtx scxt
= gen_reg_rtx (SImode
);
5629 rtx sfnaddr
= gen_reg_rtx (SImode
);
5630 unsigned char insna
[16] = {
5631 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5637 shufc
= gen_reg_rtx (TImode
);
5638 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5640 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5641 fits 18 bits and the last 4 are zeros. This will be true if
5642 the stack pointer is initialized to 0x3fff0 at program start,
5643 otherwise the ila instruction will be garbage. */
5645 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5646 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5648 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5649 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5650 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5652 mem
= memory_address (Pmode
, tramp
);
5653 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
5656 emit_insn (gen_sync ());
5660 spu_expand_sign_extend (rtx ops
[])
5662 unsigned char arr
[16];
5663 rtx pat
= gen_reg_rtx (TImode
);
5666 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5667 if (GET_MODE (ops
[1]) == QImode
)
5669 sign
= gen_reg_rtx (HImode
);
5670 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5671 for (i
= 0; i
< 16; i
++)
5677 for (i
= 0; i
< 16; i
++)
5679 switch (GET_MODE (ops
[1]))
5682 sign
= gen_reg_rtx (SImode
);
5683 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5685 arr
[last
- 1] = 0x02;
5688 sign
= gen_reg_rtx (SImode
);
5689 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5690 for (i
= 0; i
< 4; i
++)
5691 arr
[last
- i
] = 3 - i
;
5694 sign
= gen_reg_rtx (SImode
);
5695 c
= gen_reg_rtx (SImode
);
5696 emit_insn (gen_spu_convert (c
, ops
[1]));
5697 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5698 for (i
= 0; i
< 8; i
++)
5699 arr
[last
- i
] = 7 - i
;
5705 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5706 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5709 /* expand vector initialization. If there are any constant parts,
5710 load constant parts first. Then load any non-constant parts. */
5712 spu_expand_vector_init (rtx target
, rtx vals
)
5714 enum machine_mode mode
= GET_MODE (target
);
5715 int n_elts
= GET_MODE_NUNITS (mode
);
5717 bool all_same
= true;
5718 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5721 first
= XVECEXP (vals
, 0, 0);
5722 for (i
= 0; i
< n_elts
; ++i
)
5724 x
= XVECEXP (vals
, 0, i
);
5725 if (!(CONST_INT_P (x
)
5726 || GET_CODE (x
) == CONST_DOUBLE
5727 || GET_CODE (x
) == CONST_FIXED
))
5731 if (first_constant
== NULL_RTX
)
5734 if (i
> 0 && !rtx_equal_p (x
, first
))
5738 /* if all elements are the same, use splats to repeat elements */
5741 if (!CONSTANT_P (first
)
5742 && !register_operand (first
, GET_MODE (x
)))
5743 first
= force_reg (GET_MODE (first
), first
);
5744 emit_insn (gen_spu_splats (target
, first
));
5748 /* load constant parts */
5749 if (n_var
!= n_elts
)
5753 emit_move_insn (target
,
5754 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
5758 rtx constant_parts_rtx
= copy_rtx (vals
);
5760 gcc_assert (first_constant
!= NULL_RTX
);
5761 /* fill empty slots with the first constant, this increases
5762 our chance of using splats in the recursive call below. */
5763 for (i
= 0; i
< n_elts
; ++i
)
5765 x
= XVECEXP (constant_parts_rtx
, 0, i
);
5766 if (!(CONST_INT_P (x
)
5767 || GET_CODE (x
) == CONST_DOUBLE
5768 || GET_CODE (x
) == CONST_FIXED
))
5769 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
5772 spu_expand_vector_init (target
, constant_parts_rtx
);
5776 /* load variable parts */
5779 rtx insert_operands
[4];
5781 insert_operands
[0] = target
;
5782 insert_operands
[2] = target
;
5783 for (i
= 0; i
< n_elts
; ++i
)
5785 x
= XVECEXP (vals
, 0, i
);
5786 if (!(CONST_INT_P (x
)
5787 || GET_CODE (x
) == CONST_DOUBLE
5788 || GET_CODE (x
) == CONST_FIXED
))
5790 if (!register_operand (x
, GET_MODE (x
)))
5791 x
= force_reg (GET_MODE (x
), x
);
5792 insert_operands
[1] = x
;
5793 insert_operands
[3] = GEN_INT (i
);
5794 spu_builtin_insert (insert_operands
);
5800 /* Return insn index for the vector compare instruction for given CODE,
5801 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5804 get_vec_cmp_insn (enum rtx_code code
,
5805 enum machine_mode dest_mode
,
5806 enum machine_mode op_mode
)
5812 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5813 return CODE_FOR_ceq_v16qi
;
5814 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5815 return CODE_FOR_ceq_v8hi
;
5816 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5817 return CODE_FOR_ceq_v4si
;
5818 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5819 return CODE_FOR_ceq_v4sf
;
5820 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5821 return CODE_FOR_ceq_v2df
;
5824 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5825 return CODE_FOR_cgt_v16qi
;
5826 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5827 return CODE_FOR_cgt_v8hi
;
5828 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5829 return CODE_FOR_cgt_v4si
;
5830 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5831 return CODE_FOR_cgt_v4sf
;
5832 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5833 return CODE_FOR_cgt_v2df
;
5836 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5837 return CODE_FOR_clgt_v16qi
;
5838 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5839 return CODE_FOR_clgt_v8hi
;
5840 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5841 return CODE_FOR_clgt_v4si
;
5849 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5850 DMODE is expected destination mode. This is a recursive function. */
5853 spu_emit_vector_compare (enum rtx_code rcode
,
5855 enum machine_mode dmode
)
5859 enum machine_mode dest_mode
;
5860 enum machine_mode op_mode
= GET_MODE (op1
);
5862 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
5864 /* Floating point vector compare instructions uses destination V4SImode.
5865 Double floating point vector compare instructions uses destination V2DImode.
5866 Move destination to appropriate mode later. */
5867 if (dmode
== V4SFmode
)
5868 dest_mode
= V4SImode
;
5869 else if (dmode
== V2DFmode
)
5870 dest_mode
= V2DImode
;
5874 mask
= gen_reg_rtx (dest_mode
);
5875 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5877 if (vec_cmp_insn
== -1)
5879 bool swap_operands
= false;
5880 bool try_again
= false;
5885 swap_operands
= true;
5890 swap_operands
= true;
5894 /* Treat A != B as ~(A==B). */
5896 enum insn_code nor_code
;
5897 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5898 nor_code
= optab_handler (one_cmpl_optab
, (int)dest_mode
)->insn_code
;
5899 gcc_assert (nor_code
!= CODE_FOR_nothing
);
5900 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
5901 if (dmode
!= dest_mode
)
5903 rtx temp
= gen_reg_rtx (dest_mode
);
5904 convert_move (temp
, mask
, 0);
5914 /* Try GT/GTU/LT/LTU OR EQ */
5917 enum insn_code ior_code
;
5918 enum rtx_code new_code
;
5922 case GE
: new_code
= GT
; break;
5923 case GEU
: new_code
= GTU
; break;
5924 case LE
: new_code
= LT
; break;
5925 case LEU
: new_code
= LTU
; break;
5930 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
5931 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5933 ior_code
= optab_handler (ior_optab
, (int)dest_mode
)->insn_code
;
5934 gcc_assert (ior_code
!= CODE_FOR_nothing
);
5935 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
5936 if (dmode
!= dest_mode
)
5938 rtx temp
= gen_reg_rtx (dest_mode
);
5939 convert_move (temp
, mask
, 0);
5949 /* You only get two chances. */
5951 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5953 gcc_assert (vec_cmp_insn
!= -1);
5964 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
5965 if (dmode
!= dest_mode
)
5967 rtx temp
= gen_reg_rtx (dest_mode
);
5968 convert_move (temp
, mask
, 0);
5975 /* Emit vector conditional expression.
5976 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5977 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5980 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
5981 rtx cond
, rtx cc_op0
, rtx cc_op1
)
5983 enum machine_mode dest_mode
= GET_MODE (dest
);
5984 enum rtx_code rcode
= GET_CODE (cond
);
5987 /* Get the vector mask for the given relational operations. */
5988 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
5990 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
5996 spu_force_reg (enum machine_mode mode
, rtx op
)
5999 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6001 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6002 || GET_MODE (op
) == BLKmode
)
6003 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6007 r
= force_reg (GET_MODE (op
), op
);
6008 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6010 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6015 x
= gen_reg_rtx (mode
);
6016 emit_insn (gen_spu_convert (x
, r
));
6021 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6023 HOST_WIDE_INT v
= 0;
6025 /* Check the range of immediate operands. */
6026 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6028 int range
= p
- SPU_BTI_7
;
6030 if (!CONSTANT_P (op
))
6031 error ("%s expects an integer literal in the range [%d, %d].",
6033 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6035 if (GET_CODE (op
) == CONST
6036 && (GET_CODE (XEXP (op
, 0)) == PLUS
6037 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6039 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6040 op
= XEXP (XEXP (op
, 0), 0);
6042 else if (GET_CODE (op
) == CONST_INT
)
6044 else if (GET_CODE (op
) == CONST_VECTOR
6045 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6046 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6048 /* The default for v is 0 which is valid in every range. */
6049 if (v
< spu_builtin_range
[range
].low
6050 || v
> spu_builtin_range
[range
].high
)
6051 error ("%s expects an integer literal in the range [%d, %d]. ("
6052 HOST_WIDE_INT_PRINT_DEC
")",
6054 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6063 /* This is only used in lqa, and stqa. Even though the insns
6064 encode 16 bits of the address (all but the 2 least
6065 significant), only 14 bits are used because it is masked to
6066 be 16 byte aligned. */
6070 /* This is used for lqr and stqr. */
6077 if (GET_CODE (op
) == LABEL_REF
6078 || (GET_CODE (op
) == SYMBOL_REF
6079 && SYMBOL_REF_FUNCTION_P (op
))
6080 || (v
& ((1 << lsbits
) - 1)) != 0)
6081 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
6088 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6089 rtx target
, rtx ops
[])
6091 enum insn_code icode
= (enum insn_code
) d
->icode
;
6094 /* Expand the arguments into rtl. */
6096 if (d
->parm
[0] != SPU_BTI_VOID
)
6099 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6101 tree arg
= CALL_EXPR_ARG (exp
, a
);
6104 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6107 /* The insn pattern may have additional operands (SCRATCH).
6108 Return the number of actual non-SCRATCH operands. */
6109 gcc_assert (i
<= insn_data
[icode
].n_operands
);
6114 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6115 tree exp
, rtx target
)
6119 enum insn_code icode
= (enum insn_code
) d
->icode
;
6120 enum machine_mode mode
, tmode
;
6125 /* Set up ops[] with values from arglist. */
6126 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6128 /* Handle the target operand which must be operand 0. */
6130 if (d
->parm
[0] != SPU_BTI_VOID
)
6133 /* We prefer the mode specified for the match_operand otherwise
6134 use the mode from the builtin function prototype. */
6135 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6136 if (tmode
== VOIDmode
)
6137 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6139 /* Try to use target because not using it can lead to extra copies
6140 and when we are using all of the registers extra copies leads
6142 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6145 target
= ops
[0] = gen_reg_rtx (tmode
);
6147 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6153 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6155 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6160 arg
= CALL_EXPR_ARG (exp
, 0);
6161 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
6162 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6163 addr
= memory_address (mode
, op
);
6166 op
= gen_reg_rtx (GET_MODE (addr
));
6167 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6168 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6169 op
= gen_rtx_MEM (mode
, op
);
6171 pat
= GEN_FCN (icode
) (target
, op
);
6178 /* Ignore align_hint, but still expand it's args in case they have
6180 if (icode
== CODE_FOR_spu_align_hint
)
6183 /* Handle the rest of the operands. */
6184 for (p
= 1; i
< n_operands
; i
++, p
++)
6186 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6187 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6189 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6191 /* mode can be VOIDmode here for labels */
6193 /* For specific intrinsics with an immediate operand, e.g.,
6194 si_ai(), we sometimes need to convert the scalar argument to a
6195 vector argument by splatting the scalar. */
6196 if (VECTOR_MODE_P (mode
)
6197 && (GET_CODE (ops
[i
]) == CONST_INT
6198 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6199 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6201 if (GET_CODE (ops
[i
]) == CONST_INT
)
6202 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6205 rtx reg
= gen_reg_rtx (mode
);
6206 enum machine_mode imode
= GET_MODE_INNER (mode
);
6207 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6208 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6209 if (imode
!= GET_MODE (ops
[i
]))
6210 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6211 TYPE_UNSIGNED (spu_builtin_types
6213 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6218 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6220 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6221 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6227 pat
= GEN_FCN (icode
) (0);
6230 pat
= GEN_FCN (icode
) (ops
[0]);
6233 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6236 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6239 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6242 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6245 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6254 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6255 emit_call_insn (pat
);
6256 else if (d
->type
== B_JUMP
)
6258 emit_jump_insn (pat
);
6264 return_type
= spu_builtin_types
[d
->parm
[0]];
6265 if (d
->parm
[0] != SPU_BTI_VOID
6266 && GET_MODE (target
) != TYPE_MODE (return_type
))
6268 /* target is the return value. It should always be the mode of
6269 the builtin function prototype. */
6270 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6277 spu_expand_builtin (tree exp
,
6279 rtx subtarget ATTRIBUTE_UNUSED
,
6280 enum machine_mode mode ATTRIBUTE_UNUSED
,
6281 int ignore ATTRIBUTE_UNUSED
)
6283 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6284 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
6285 struct spu_builtin_description
*d
;
6287 if (fcode
< NUM_SPU_BUILTINS
)
6289 d
= &spu_builtins
[fcode
];
6291 return spu_expand_builtin_1 (d
, exp
, target
);
6296 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6298 spu_builtin_mul_widen_even (tree type
)
6300 switch (TYPE_MODE (type
))
6303 if (TYPE_UNSIGNED (type
))
6304 return spu_builtins
[SPU_MULE_0
].fndecl
;
6306 return spu_builtins
[SPU_MULE_1
].fndecl
;
6313 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6315 spu_builtin_mul_widen_odd (tree type
)
6317 switch (TYPE_MODE (type
))
6320 if (TYPE_UNSIGNED (type
))
6321 return spu_builtins
[SPU_MULO_1
].fndecl
;
6323 return spu_builtins
[SPU_MULO_0
].fndecl
;
6330 /* Implement targetm.vectorize.builtin_mask_for_load. */
6332 spu_builtin_mask_for_load (void)
6334 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
6339 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6341 spu_builtin_vectorization_cost (bool runtime_test
)
6343 /* If the branch of the runtime test is taken - i.e. - the vectorized
6344 version is skipped - this incurs a misprediction cost (because the
6345 vectorized version is expected to be the fall-through). So we subtract
6346 the latency of a mispredicted branch from the costs that are incurred
6347 when the vectorized version is executed. */
6354 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6355 after applying N number of iterations. This routine does not determine
6356 how may iterations are required to reach desired alignment. */
6359 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6364 /* All other types are naturally aligned. */
6368 /* Implement targetm.vectorize.builtin_vec_perm. */
6370 spu_builtin_vec_perm (tree type
, tree
*mask_element_type
)
6372 struct spu_builtin_description
*d
;
6374 *mask_element_type
= unsigned_char_type_node
;
6376 switch (TYPE_MODE (type
))
6379 if (TYPE_UNSIGNED (type
))
6380 d
= &spu_builtins
[SPU_SHUFFLE_0
];
6382 d
= &spu_builtins
[SPU_SHUFFLE_1
];
6386 if (TYPE_UNSIGNED (type
))
6387 d
= &spu_builtins
[SPU_SHUFFLE_2
];
6389 d
= &spu_builtins
[SPU_SHUFFLE_3
];
6393 if (TYPE_UNSIGNED (type
))
6394 d
= &spu_builtins
[SPU_SHUFFLE_4
];
6396 d
= &spu_builtins
[SPU_SHUFFLE_5
];
6400 if (TYPE_UNSIGNED (type
))
6401 d
= &spu_builtins
[SPU_SHUFFLE_6
];
6403 d
= &spu_builtins
[SPU_SHUFFLE_7
];
6407 d
= &spu_builtins
[SPU_SHUFFLE_8
];
6411 d
= &spu_builtins
[SPU_SHUFFLE_9
];
6422 /* Count the total number of instructions in each pipe and return the
6423 maximum, which is used as the Minimum Iteration Interval (MII)
6424 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6425 -2 are instructions that can go in pipe0 or pipe1. */
6427 spu_sms_res_mii (struct ddg
*g
)
6430 unsigned t
[4] = {0, 0, 0, 0};
6432 for (i
= 0; i
< g
->num_nodes
; i
++)
6434 rtx insn
= g
->nodes
[i
].insn
;
6435 int p
= get_pipe (insn
) + 2;
6441 if (dump_file
&& INSN_P (insn
))
6442 fprintf (dump_file
, "i%d %s %d %d\n",
6444 insn_data
[INSN_CODE(insn
)].name
,
6448 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6450 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6455 spu_init_expanders (void)
6460 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6461 frame_pointer_needed is true. We don't know that until we're
6462 expanding the prologue. */
6463 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6465 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6466 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6467 to be treated as aligned, so generate them here. */
6468 r0
= gen_reg_rtx (SImode
);
6469 r1
= gen_reg_rtx (SImode
);
6470 mark_reg_pointer (r0
, 128);
6471 mark_reg_pointer (r1
, 128);
6472 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6473 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6477 static enum machine_mode
6478 spu_libgcc_cmp_return_mode (void)
6481 /* For SPU word mode is TI mode so it is better to use SImode
6482 for compare returns. */
6486 static enum machine_mode
6487 spu_libgcc_shift_count_mode (void)
6489 /* For SPU word mode is TI mode so it is better to use SImode
6490 for shift counts. */
6494 /* An early place to adjust some flags after GCC has finished processing
6497 asm_file_start (void)
6499 /* Variable tracking should be run after all optimizations which
6500 change order of insns. It also needs a valid CFG. */
6501 spu_flag_var_tracking
= flag_var_tracking
;
6502 flag_var_tracking
= 0;
6504 default_file_start ();
6507 /* Implement targetm.section_type_flags. */
6509 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6511 /* .toe needs to have type @nobits. */
6512 if (strcmp (name
, ".toe") == 0)
6514 return default_section_type_flags (decl
, name
, reloc
);
6517 /* Generate a constant or register which contains 2^SCALE. We assume
6518 the result is valid for MODE. Currently, MODE must be V4SFmode and
6519 SCALE must be SImode. */
6521 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
6523 gcc_assert (mode
== V4SFmode
);
6524 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6525 if (GET_CODE (scale
) != CONST_INT
)
6527 /* unsigned int exp = (127 + scale) << 23;
6528 __vector float m = (__vector float) spu_splats (exp); */
6529 rtx reg
= force_reg (SImode
, scale
);
6530 rtx exp
= gen_reg_rtx (SImode
);
6531 rtx mul
= gen_reg_rtx (mode
);
6532 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6533 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6534 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6539 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6540 unsigned char arr
[16];
6541 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6542 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6543 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6544 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6545 return array_to_constant (mode
, arr
);
6549 /* After reload, just change the convert into a move instruction
6550 or a dead instruction. */
6552 spu_split_convert (rtx ops
[])
6554 if (REGNO (ops
[0]) == REGNO (ops
[1]))
6555 emit_note (NOTE_INSN_DELETED
);
6558 /* Use TImode always as this might help hard reg copyprop. */
6559 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
6560 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
6561 emit_insn (gen_move_insn (op0
, op1
));