1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tm-constrs.h"
59 /* Builtin types, data and prototypes. */
61 enum spu_builtin_type_index
63 SPU_BTI_END_OF_PARAMS
,
65 /* We create new type nodes for these. */
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
80 /* These all correspond to intSI_type_node */
94 /* These correspond to the standard types */
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
125 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
127 struct spu_builtin_range
132 static struct spu_builtin_range spu_builtin_range
[] = {
133 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
134 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
136 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
137 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
139 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
140 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
141 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
151 /* Prototypes and external defs. */
152 static void spu_init_builtins (void);
153 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
154 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
155 static bool spu_legitimate_address_p (enum machine_mode
, rtx
, bool);
156 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
157 static rtx
get_pic_reg (void);
158 static int need_to_save_reg (int regno
, int saving
);
159 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
160 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
161 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
163 static void emit_nop_for_insn (rtx insn
);
164 static bool insn_clobbers_hbr (rtx insn
);
165 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
166 int distance
, sbitmap blocks
);
167 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
168 enum machine_mode dmode
);
169 static rtx
get_branch_target (rtx branch
);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
174 static int get_pipe (rtx insn
);
175 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx
*, int *, int);
179 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
181 unsigned char *no_add_attrs
);
182 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
184 unsigned char *no_add_attrs
);
185 static int spu_naked_function_p (tree func
);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
187 const_tree type
, unsigned char named
);
188 static tree
spu_build_builtin_va_list (void);
189 static void spu_va_start (tree
, rtx
);
190 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
,
191 gimple_seq
* pre_p
, gimple_seq
* post_p
);
192 static int store_with_one_insn_p (rtx mem
);
193 static int mem_is_padded_component_ref (rtx x
);
194 static int reg_aligned_for_addr (rtx x
);
195 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
196 static void spu_asm_globalize_label (FILE * file
, const char *name
);
197 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
198 int *total
, bool speed
);
199 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree
, rtx
, int);
204 static rtx
spu_legitimize_address (rtx
, rtx
, enum machine_mode
);
205 static tree
spu_builtin_mul_widen_even (tree
);
206 static tree
spu_builtin_mul_widen_odd (tree
);
207 static tree
spu_builtin_mask_for_load (void);
208 static int spu_builtin_vectorization_cost (bool);
209 static bool spu_vector_alignment_reachable (const_tree
, bool);
210 static tree
spu_builtin_vec_perm (tree
, tree
*);
211 static int spu_sms_res_mii (struct ddg
*g
);
212 static void asm_file_start (void);
213 static unsigned int spu_section_type_flags (tree
, const char *, int);
214 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
216 extern const char *reg_names
[];
218 /* Which instruction set architecture to use. */
220 /* Which cpu are we tuning for. */
223 /* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229 int spu_hint_dist
= (8*4) - (2*4);
231 /* Determines whether we run variable tracking in machine dependent
233 static int spu_flag_var_tracking
;
248 IC_POOL
, /* constant pool */
249 IC_IL1
, /* one il* instruction */
250 IC_IL2
, /* both ilhu and iohl instructions */
251 IC_IL1s
, /* one il* instruction */
252 IC_IL2s
, /* both ilhu and iohl instructions */
253 IC_FSMBI
, /* the fsmbi instruction */
254 IC_CPAT
, /* one of the c*d instructions */
255 IC_FSMBI2
/* fsmbi plus 1 other instruction */
258 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
259 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
260 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
261 static enum immediate_class
classify_immediate (rtx op
,
262 enum machine_mode mode
);
264 static enum machine_mode
spu_unwind_word_mode (void);
266 static enum machine_mode
267 spu_libgcc_cmp_return_mode (void);
269 static enum machine_mode
270 spu_libgcc_shift_count_mode (void);
273 /* TARGET overrides. */
275 #undef TARGET_INIT_BUILTINS
276 #define TARGET_INIT_BUILTINS spu_init_builtins
278 #undef TARGET_EXPAND_BUILTIN
279 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
281 #undef TARGET_UNWIND_WORD_MODE
282 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
284 #undef TARGET_LEGITIMIZE_ADDRESS
285 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
287 /* The .8byte directive doesn't seem to work well for a 32 bit
289 #undef TARGET_ASM_UNALIGNED_DI_OP
290 #define TARGET_ASM_UNALIGNED_DI_OP NULL
292 #undef TARGET_RTX_COSTS
293 #define TARGET_RTX_COSTS spu_rtx_costs
295 #undef TARGET_ADDRESS_COST
296 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
298 #undef TARGET_SCHED_ISSUE_RATE
299 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
301 #undef TARGET_SCHED_INIT_GLOBAL
302 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
304 #undef TARGET_SCHED_INIT
305 #define TARGET_SCHED_INIT spu_sched_init
307 #undef TARGET_SCHED_VARIABLE_ISSUE
308 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
310 #undef TARGET_SCHED_REORDER
311 #define TARGET_SCHED_REORDER spu_sched_reorder
313 #undef TARGET_SCHED_REORDER2
314 #define TARGET_SCHED_REORDER2 spu_sched_reorder
316 #undef TARGET_SCHED_ADJUST_COST
317 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
319 const struct attribute_spec spu_attribute_table
[];
320 #undef TARGET_ATTRIBUTE_TABLE
321 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER spu_assemble_integer
326 #undef TARGET_SCALAR_MODE_SUPPORTED_P
327 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
330 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
332 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
333 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
335 #undef TARGET_ASM_GLOBALIZE_LABEL
336 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
338 #undef TARGET_PASS_BY_REFERENCE
339 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
341 #undef TARGET_MUST_PASS_IN_STACK
342 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_BUILD_BUILTIN_VA_LIST
345 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
347 #undef TARGET_EXPAND_BUILTIN_VA_START
348 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
350 #undef TARGET_SETUP_INCOMING_VARARGS
351 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
353 #undef TARGET_MACHINE_DEPENDENT_REORG
354 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
356 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
357 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
359 #undef TARGET_DEFAULT_TARGET_FLAGS
360 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
362 #undef TARGET_INIT_LIBFUNCS
363 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
365 #undef TARGET_RETURN_IN_MEMORY
366 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
368 #undef TARGET_ENCODE_SECTION_INFO
369 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
371 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
372 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
374 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
375 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
377 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
378 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
380 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
381 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
383 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
384 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
386 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
387 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
389 #undef TARGET_LIBGCC_CMP_RETURN_MODE
390 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
392 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
393 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
395 #undef TARGET_SCHED_SMS_RES_MII
396 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
398 #undef TARGET_ASM_FILE_START
399 #define TARGET_ASM_FILE_START asm_file_start
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
404 #undef TARGET_LEGITIMATE_ADDRESS_P
405 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
407 struct gcc_target targetm
= TARGET_INITIALIZER
;
410 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
412 /* Override some of the default param values. With so many registers
413 larger values are better for these params. */
414 MAX_PENDING_LIST_LENGTH
= 128;
416 /* With so many registers this is better on by default. */
417 flag_rename_registers
= 1;
420 /* Sometimes certain combinations of command options do not make sense
421 on a particular target machine. You can define a macro
422 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
423 executed once just after all the command options have been parsed. */
425 spu_override_options (void)
427 /* Small loops will be unpeeled at -O3. For SPU it is more important
428 to keep code small by default. */
429 if (!flag_unroll_loops
&& !flag_peel_loops
430 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
431 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
433 flag_omit_frame_pointer
= 1;
435 /* Functions must be 8 byte aligned so we correctly handle dual issue */
436 if (align_functions
< 8)
439 spu_hint_dist
= 8*4 - spu_max_nops
*4;
440 if (spu_hint_dist
< 0)
443 if (spu_fixed_range_string
)
444 fix_range (spu_fixed_range_string
);
446 /* Determine processor architectural level. */
449 if (strcmp (&spu_arch_string
[0], "cell") == 0)
450 spu_arch
= PROCESSOR_CELL
;
451 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
452 spu_arch
= PROCESSOR_CELLEDP
;
454 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
457 /* Determine processor to tune for. */
460 if (strcmp (&spu_tune_string
[0], "cell") == 0)
461 spu_tune
= PROCESSOR_CELL
;
462 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
463 spu_tune
= PROCESSOR_CELLEDP
;
465 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
468 /* Change defaults according to the processor architecture. */
469 if (spu_arch
== PROCESSOR_CELLEDP
)
471 /* If no command line option has been otherwise specified, change
472 the default to -mno-safe-hints on celledp -- only the original
473 Cell/B.E. processors require this workaround. */
474 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
475 target_flags
&= ~MASK_SAFE_HINTS
;
478 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
481 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
482 struct attribute_spec.handler. */
484 /* Table of machine attributes. */
485 const struct attribute_spec spu_attribute_table
[] =
487 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
488 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
489 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
490 { NULL
, 0, 0, false, false, false, NULL
}
493 /* True if MODE is valid for the target. By "valid", we mean able to
494 be manipulated in non-trivial ways. In particular, this means all
495 the arithmetic is supported. */
497 spu_scalar_mode_supported_p (enum machine_mode mode
)
515 /* Similarly for vector modes. "Supported" here is less strict. At
516 least some operations are supported; need to check optabs or builtins
517 for further details. */
519 spu_vector_mode_supported_p (enum machine_mode mode
)
536 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
537 least significant bytes of the outer mode. This function returns
538 TRUE for the SUBREG's where this is correct. */
540 valid_subreg (rtx op
)
542 enum machine_mode om
= GET_MODE (op
);
543 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
544 return om
!= VOIDmode
&& im
!= VOIDmode
545 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
546 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
547 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
550 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
551 and adjust the start offset. */
553 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
555 enum machine_mode mode
;
557 /* Strip any paradoxical SUBREG. */
558 if (GET_CODE (op
) == SUBREG
559 && (GET_MODE_BITSIZE (GET_MODE (op
))
560 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
564 GET_MODE_BITSIZE (GET_MODE (op
)) -
565 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
566 op
= SUBREG_REG (op
);
568 /* If it is smaller than SI, assure a SUBREG */
569 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
573 *start
+= 32 - op_size
;
576 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
577 mode
= mode_for_size (op_size
, MODE_INT
, 0);
578 if (mode
!= GET_MODE (op
))
579 op
= gen_rtx_SUBREG (mode
, op
, 0);
584 spu_expand_extv (rtx ops
[], int unsignedp
)
586 rtx dst
= ops
[0], src
= ops
[1];
587 HOST_WIDE_INT width
= INTVAL (ops
[2]);
588 HOST_WIDE_INT start
= INTVAL (ops
[3]);
589 HOST_WIDE_INT align_mask
;
590 rtx s0
, s1
, mask
, r0
;
592 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
596 /* First, determine if we need 1 TImode load or 2. We need only 1
597 if the bits being extracted do not cross the alignment boundary
598 as determined by the MEM and its address. */
600 align_mask
= -MEM_ALIGN (src
);
601 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
603 /* Alignment is sufficient for 1 load. */
604 s0
= gen_reg_rtx (TImode
);
605 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
608 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
613 s0
= gen_reg_rtx (TImode
);
614 s1
= gen_reg_rtx (TImode
);
615 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
618 gcc_assert (start
+ width
<= 128);
621 rtx r1
= gen_reg_rtx (SImode
);
622 mask
= gen_reg_rtx (TImode
);
623 emit_move_insn (mask
, GEN_INT (-1));
624 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
625 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
626 if (GET_CODE (r0
) == CONST_INT
)
627 r1
= GEN_INT (INTVAL (r0
) & 15);
629 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
630 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
631 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
636 else if (GET_CODE (src
) == SUBREG
)
638 rtx r
= SUBREG_REG (src
);
639 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
640 s0
= gen_reg_rtx (TImode
);
641 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
642 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
644 emit_move_insn (s0
, src
);
648 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
649 s0
= gen_reg_rtx (TImode
);
650 emit_move_insn (s0
, src
);
653 /* Now s0 is TImode and contains the bits to extract at start. */
656 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
660 tree c
= build_int_cst (NULL_TREE
, 128 - width
);
661 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, c
, s0
, unsignedp
);
664 emit_move_insn (dst
, s0
);
668 spu_expand_insv (rtx ops
[])
670 HOST_WIDE_INT width
= INTVAL (ops
[1]);
671 HOST_WIDE_INT start
= INTVAL (ops
[2]);
672 HOST_WIDE_INT maskbits
;
673 enum machine_mode dst_mode
, src_mode
;
674 rtx dst
= ops
[0], src
= ops
[3];
675 int dst_size
, src_size
;
681 if (GET_CODE (ops
[0]) == MEM
)
682 dst
= gen_reg_rtx (TImode
);
684 dst
= adjust_operand (dst
, &start
);
685 dst_mode
= GET_MODE (dst
);
686 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
688 if (CONSTANT_P (src
))
690 enum machine_mode m
=
691 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
692 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
694 src
= adjust_operand (src
, 0);
695 src_mode
= GET_MODE (src
);
696 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
698 mask
= gen_reg_rtx (dst_mode
);
699 shift_reg
= gen_reg_rtx (dst_mode
);
700 shift
= dst_size
- start
- width
;
702 /* It's not safe to use subreg here because the compiler assumes
703 that the SUBREG_REG is right justified in the SUBREG. */
704 convert_move (shift_reg
, src
, 1);
711 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
714 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
717 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
729 maskbits
= (-1ll << (32 - width
- start
));
731 maskbits
+= (1ll << (32 - start
));
732 emit_move_insn (mask
, GEN_INT (maskbits
));
735 maskbits
= (-1ll << (64 - width
- start
));
737 maskbits
+= (1ll << (64 - start
));
738 emit_move_insn (mask
, GEN_INT (maskbits
));
742 unsigned char arr
[16];
744 memset (arr
, 0, sizeof (arr
));
745 arr
[i
] = 0xff >> (start
& 7);
746 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
748 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
749 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
755 if (GET_CODE (ops
[0]) == MEM
)
757 rtx low
= gen_reg_rtx (SImode
);
758 rtx rotl
= gen_reg_rtx (SImode
);
759 rtx mask0
= gen_reg_rtx (TImode
);
765 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
766 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
767 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
768 emit_insn (gen_negsi2 (rotl
, low
));
769 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
770 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
771 mem
= change_address (ops
[0], TImode
, addr0
);
772 set_mem_alias_set (mem
, 0);
773 emit_move_insn (dst
, mem
);
774 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
775 if (start
+ width
> MEM_ALIGN (ops
[0]))
777 rtx shl
= gen_reg_rtx (SImode
);
778 rtx mask1
= gen_reg_rtx (TImode
);
779 rtx dst1
= gen_reg_rtx (TImode
);
781 addr1
= plus_constant (addr
, 16);
782 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
783 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
784 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
785 mem1
= change_address (ops
[0], TImode
, addr1
);
786 set_mem_alias_set (mem1
, 0);
787 emit_move_insn (dst1
, mem1
);
788 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
789 emit_move_insn (mem1
, dst1
);
791 emit_move_insn (mem
, dst
);
794 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
799 spu_expand_block_move (rtx ops
[])
801 HOST_WIDE_INT bytes
, align
, offset
;
802 rtx src
, dst
, sreg
, dreg
, target
;
804 if (GET_CODE (ops
[2]) != CONST_INT
805 || GET_CODE (ops
[3]) != CONST_INT
806 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
809 bytes
= INTVAL (ops
[2]);
810 align
= INTVAL (ops
[3]);
820 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
822 dst
= adjust_address (ops
[0], V16QImode
, offset
);
823 src
= adjust_address (ops
[1], V16QImode
, offset
);
824 emit_move_insn (dst
, src
);
829 unsigned char arr
[16] = { 0 };
830 for (i
= 0; i
< bytes
- offset
; i
++)
832 dst
= adjust_address (ops
[0], V16QImode
, offset
);
833 src
= adjust_address (ops
[1], V16QImode
, offset
);
834 mask
= gen_reg_rtx (V16QImode
);
835 sreg
= gen_reg_rtx (V16QImode
);
836 dreg
= gen_reg_rtx (V16QImode
);
837 target
= gen_reg_rtx (V16QImode
);
838 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
839 emit_move_insn (dreg
, dst
);
840 emit_move_insn (sreg
, src
);
841 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
842 emit_move_insn (dst
, target
);
850 { SPU_EQ
, SPU_GT
, SPU_GTU
};
852 int spu_comp_icode
[12][3] = {
853 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
854 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
855 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
856 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
857 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
858 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
859 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
860 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
861 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
862 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
863 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
864 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
867 /* Generate a compare for CODE. Return a brand-new rtx that represents
868 the result of the compare. GCC can figure this out too if we don't
869 provide all variations of compares, but GCC always wants to use
870 WORD_MODE, we can generate better code in most cases if we do it
873 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
875 int reverse_compare
= 0;
876 int reverse_test
= 0;
877 rtx compare_result
, eq_result
;
878 rtx comp_rtx
, eq_rtx
;
879 enum machine_mode comp_mode
;
880 enum machine_mode op_mode
;
881 enum spu_comp_code scode
, eq_code
;
882 enum insn_code ior_code
;
883 enum rtx_code code
= GET_CODE (cmp
);
884 rtx op0
= XEXP (cmp
, 0);
885 rtx op1
= XEXP (cmp
, 1);
889 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
890 and so on, to keep the constant in operand 1. */
891 if (GET_CODE (op1
) == CONST_INT
)
893 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
894 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
919 op_mode
= GET_MODE (op0
);
925 if (HONOR_NANS (op_mode
))
940 if (HONOR_NANS (op_mode
))
1020 comp_mode
= op_mode
;
1024 comp_mode
= op_mode
;
1028 comp_mode
= op_mode
;
1032 comp_mode
= V4SImode
;
1036 comp_mode
= V2DImode
;
1043 if (GET_MODE (op1
) == DFmode
1044 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
1047 if (is_set
== 0 && op1
== const0_rtx
1048 && (GET_MODE (op0
) == SImode
1049 || GET_MODE (op0
) == HImode
) && scode
== SPU_EQ
)
1051 /* Don't need to set a register with the result when we are
1052 comparing against zero and branching. */
1053 reverse_test
= !reverse_test
;
1054 compare_result
= op0
;
1058 compare_result
= gen_reg_rtx (comp_mode
);
1060 if (reverse_compare
)
1067 if (spu_comp_icode
[index
][scode
] == 0)
1070 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
1072 op0
= force_reg (op_mode
, op0
);
1073 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
1075 op1
= force_reg (op_mode
, op1
);
1076 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
1080 emit_insn (comp_rtx
);
1084 eq_result
= gen_reg_rtx (comp_mode
);
1085 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
1090 ior_code
= ior_optab
->handlers
[(int)comp_mode
].insn_code
;
1091 gcc_assert (ior_code
!= CODE_FOR_nothing
);
1092 emit_insn (GEN_FCN (ior_code
)
1093 (compare_result
, compare_result
, eq_result
));
1102 /* We don't have branch on QI compare insns, so we convert the
1103 QI compare result to a HI result. */
1104 if (comp_mode
== QImode
)
1106 rtx old_res
= compare_result
;
1107 compare_result
= gen_reg_rtx (HImode
);
1109 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
1113 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
1115 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
1117 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
1118 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
1119 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
1122 else if (is_set
== 2)
1124 rtx target
= operands
[0];
1125 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
1126 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
1127 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
1129 rtx op_t
= operands
[2];
1130 rtx op_f
= operands
[3];
1132 /* The result of the comparison can be SI, HI or QI mode. Create a
1133 mask based on that result. */
1134 if (target_size
> compare_size
)
1136 select_mask
= gen_reg_rtx (mode
);
1137 emit_insn (gen_extend_compare (select_mask
, compare_result
));
1139 else if (target_size
< compare_size
)
1141 gen_rtx_SUBREG (mode
, compare_result
,
1142 (compare_size
- target_size
) / BITS_PER_UNIT
);
1143 else if (comp_mode
!= mode
)
1144 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
1146 select_mask
= compare_result
;
1148 if (GET_MODE (target
) != GET_MODE (op_t
)
1149 || GET_MODE (target
) != GET_MODE (op_f
))
1153 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1155 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1159 rtx target
= operands
[0];
1161 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1162 gen_rtx_NOT (comp_mode
, compare_result
)));
1163 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1164 emit_insn (gen_extendhisi2 (target
, compare_result
));
1165 else if (GET_MODE (target
) == SImode
1166 && GET_MODE (compare_result
) == QImode
)
1167 emit_insn (gen_extend_compare (target
, compare_result
));
1169 emit_move_insn (target
, compare_result
);
1174 const_double_to_hwint (rtx x
)
1178 if (GET_MODE (x
) == SFmode
)
1180 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1181 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1183 else if (GET_MODE (x
) == DFmode
)
1186 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1187 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1189 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1197 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1201 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1204 tv
[0] = (v
<< 32) >> 32;
1205 else if (mode
== DFmode
)
1207 tv
[1] = (v
<< 32) >> 32;
1210 real_from_target (&rv
, tv
, mode
);
1211 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1215 print_operand_address (FILE * file
, register rtx addr
)
1220 if (GET_CODE (addr
) == AND
1221 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1222 && INTVAL (XEXP (addr
, 1)) == -16)
1223 addr
= XEXP (addr
, 0);
1225 switch (GET_CODE (addr
))
1228 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1232 reg
= XEXP (addr
, 0);
1233 offset
= XEXP (addr
, 1);
1234 if (GET_CODE (offset
) == REG
)
1236 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1237 reg_names
[REGNO (offset
)]);
1239 else if (GET_CODE (offset
) == CONST_INT
)
1241 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1242 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1252 output_addr_const (file
, addr
);
1262 print_operand (FILE * file
, rtx x
, int code
)
1264 enum machine_mode mode
= GET_MODE (x
);
1266 unsigned char arr
[16];
1267 int xcode
= GET_CODE (x
);
1269 if (GET_MODE (x
) == VOIDmode
)
1272 case 'L': /* 128 bits, signed */
1273 case 'm': /* 128 bits, signed */
1274 case 'T': /* 128 bits, signed */
1275 case 't': /* 128 bits, signed */
1278 case 'K': /* 64 bits, signed */
1279 case 'k': /* 64 bits, signed */
1280 case 'D': /* 64 bits, signed */
1281 case 'd': /* 64 bits, signed */
1284 case 'J': /* 32 bits, signed */
1285 case 'j': /* 32 bits, signed */
1286 case 's': /* 32 bits, signed */
1287 case 'S': /* 32 bits, signed */
1294 case 'j': /* 32 bits, signed */
1295 case 'k': /* 64 bits, signed */
1296 case 'm': /* 128 bits, signed */
1297 if (xcode
== CONST_INT
1298 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1300 gcc_assert (logical_immediate_p (x
, mode
));
1301 constant_to_array (mode
, x
, arr
);
1302 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1303 val
= trunc_int_for_mode (val
, SImode
);
1304 switch (which_logical_immediate (val
))
1309 fprintf (file
, "h");
1312 fprintf (file
, "b");
1322 case 'J': /* 32 bits, signed */
1323 case 'K': /* 64 bits, signed */
1324 case 'L': /* 128 bits, signed */
1325 if (xcode
== CONST_INT
1326 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1328 gcc_assert (logical_immediate_p (x
, mode
)
1329 || iohl_immediate_p (x
, mode
));
1330 constant_to_array (mode
, x
, arr
);
1331 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1332 val
= trunc_int_for_mode (val
, SImode
);
1333 switch (which_logical_immediate (val
))
1339 val
= trunc_int_for_mode (val
, HImode
);
1342 val
= trunc_int_for_mode (val
, QImode
);
1347 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1353 case 't': /* 128 bits, signed */
1354 case 'd': /* 64 bits, signed */
1355 case 's': /* 32 bits, signed */
1358 enum immediate_class c
= classify_immediate (x
, mode
);
1362 constant_to_array (mode
, x
, arr
);
1363 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1364 val
= trunc_int_for_mode (val
, SImode
);
1365 switch (which_immediate_load (val
))
1370 fprintf (file
, "a");
1373 fprintf (file
, "h");
1376 fprintf (file
, "hu");
1383 constant_to_array (mode
, x
, arr
);
1384 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1386 fprintf (file
, "b");
1388 fprintf (file
, "h");
1390 fprintf (file
, "w");
1392 fprintf (file
, "d");
1395 if (xcode
== CONST_VECTOR
)
1397 x
= CONST_VECTOR_ELT (x
, 0);
1398 xcode
= GET_CODE (x
);
1400 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1401 fprintf (file
, "a");
1402 else if (xcode
== HIGH
)
1403 fprintf (file
, "hu");
1417 case 'T': /* 128 bits, signed */
1418 case 'D': /* 64 bits, signed */
1419 case 'S': /* 32 bits, signed */
1422 enum immediate_class c
= classify_immediate (x
, mode
);
1426 constant_to_array (mode
, x
, arr
);
1427 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1428 val
= trunc_int_for_mode (val
, SImode
);
1429 switch (which_immediate_load (val
))
1436 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1441 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1444 constant_to_array (mode
, x
, arr
);
1446 for (i
= 0; i
< 16; i
++)
1451 print_operand (file
, GEN_INT (val
), 0);
1454 constant_to_array (mode
, x
, arr
);
1455 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1456 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1461 if (GET_CODE (x
) == CONST_VECTOR
)
1462 x
= CONST_VECTOR_ELT (x
, 0);
1463 output_addr_const (file
, x
);
1465 fprintf (file
, "@h");
1479 if (xcode
== CONST_INT
)
1481 /* Only 4 least significant bits are relevant for generate
1482 control word instructions. */
1483 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1488 case 'M': /* print code for c*d */
1489 if (GET_CODE (x
) == CONST_INT
)
1493 fprintf (file
, "b");
1496 fprintf (file
, "h");
1499 fprintf (file
, "w");
1502 fprintf (file
, "d");
1511 case 'N': /* Negate the operand */
1512 if (xcode
== CONST_INT
)
1513 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1514 else if (xcode
== CONST_VECTOR
)
1515 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1516 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1519 case 'I': /* enable/disable interrupts */
1520 if (xcode
== CONST_INT
)
1521 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1524 case 'b': /* branch modifiers */
1526 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1527 else if (COMPARISON_P (x
))
1528 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1531 case 'i': /* indirect call */
1534 if (GET_CODE (XEXP (x
, 0)) == REG
)
1535 /* Used in indirect function calls. */
1536 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1538 output_address (XEXP (x
, 0));
1542 case 'p': /* load/store */
1546 xcode
= GET_CODE (x
);
1551 xcode
= GET_CODE (x
);
1554 fprintf (file
, "d");
1555 else if (xcode
== CONST_INT
)
1556 fprintf (file
, "a");
1557 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1558 fprintf (file
, "r");
1559 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1561 if (GET_CODE (XEXP (x
, 1)) == REG
)
1562 fprintf (file
, "x");
1564 fprintf (file
, "d");
1569 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1571 output_addr_const (file
, GEN_INT (val
));
1575 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1577 output_addr_const (file
, GEN_INT (val
));
1581 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1583 output_addr_const (file
, GEN_INT (val
));
1587 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1588 val
= (val
>> 3) & 0x1f;
1589 output_addr_const (file
, GEN_INT (val
));
1593 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1596 output_addr_const (file
, GEN_INT (val
));
1600 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1603 output_addr_const (file
, GEN_INT (val
));
1607 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1610 output_addr_const (file
, GEN_INT (val
));
1614 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1615 val
= -(val
& -8ll);
1616 val
= (val
>> 3) & 0x1f;
1617 output_addr_const (file
, GEN_INT (val
));
1622 constant_to_array (mode
, x
, arr
);
1623 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1624 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1629 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1630 else if (xcode
== MEM
)
1631 output_address (XEXP (x
, 0));
1632 else if (xcode
== CONST_VECTOR
)
1633 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1635 output_addr_const (file
, x
);
1642 output_operand_lossage ("invalid %%xn code");
1647 extern char call_used_regs
[];
1649 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1650 caller saved register. For leaf functions it is more efficient to
1651 use a volatile register because we won't need to save and restore the
1652 pic register. This routine is only valid after register allocation
1653 is completed, so we can pick an unused register. */
1657 rtx pic_reg
= pic_offset_table_rtx
;
1658 if (!reload_completed
&& !reload_in_progress
)
1663 /* Split constant addresses to handle cases that are too large.
1664 Add in the pic register when in PIC mode.
1665 Split immediates that require more than 1 instruction. */
1667 spu_split_immediate (rtx
* ops
)
1669 enum machine_mode mode
= GET_MODE (ops
[0]);
1670 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1676 unsigned char arrhi
[16];
1677 unsigned char arrlo
[16];
1678 rtx to
, temp
, hi
, lo
;
1680 enum machine_mode imode
= mode
;
1681 /* We need to do reals as ints because the constant used in the
1682 IOR might not be a legitimate real constant. */
1683 imode
= int_mode_for_mode (mode
);
1684 constant_to_array (mode
, ops
[1], arrhi
);
1686 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1689 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1690 for (i
= 0; i
< 16; i
+= 4)
1692 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1693 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1694 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1695 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1697 hi
= array_to_constant (imode
, arrhi
);
1698 lo
= array_to_constant (imode
, arrlo
);
1699 emit_move_insn (temp
, hi
);
1700 emit_insn (gen_rtx_SET
1701 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1706 unsigned char arr_fsmbi
[16];
1707 unsigned char arr_andbi
[16];
1708 rtx to
, reg_fsmbi
, reg_and
;
1710 enum machine_mode imode
= mode
;
1711 /* We need to do reals as ints because the constant used in the
1712 * AND might not be a legitimate real constant. */
1713 imode
= int_mode_for_mode (mode
);
1714 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1716 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1719 for (i
= 0; i
< 16; i
++)
1720 if (arr_fsmbi
[i
] != 0)
1722 arr_andbi
[0] = arr_fsmbi
[i
];
1723 arr_fsmbi
[i
] = 0xff;
1725 for (i
= 1; i
< 16; i
++)
1726 arr_andbi
[i
] = arr_andbi
[0];
1727 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1728 reg_and
= array_to_constant (imode
, arr_andbi
);
1729 emit_move_insn (to
, reg_fsmbi
);
1730 emit_insn (gen_rtx_SET
1731 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1735 if (reload_in_progress
|| reload_completed
)
1737 rtx mem
= force_const_mem (mode
, ops
[1]);
1738 if (TARGET_LARGE_MEM
)
1740 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1741 emit_move_insn (addr
, XEXP (mem
, 0));
1742 mem
= replace_equiv_address (mem
, addr
);
1744 emit_move_insn (ops
[0], mem
);
1750 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1754 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1755 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1758 emit_insn (gen_pic (ops
[0], ops
[1]));
1761 rtx pic_reg
= get_pic_reg ();
1762 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1763 crtl
->uses_pic_offset_table
= 1;
1765 return flag_pic
|| c
== IC_IL2s
;
1776 /* SAVING is TRUE when we are generating the actual load and store
1777 instructions for REGNO. When determining the size of the stack
1778 needed for saving register we must allocate enough space for the
1779 worst case, because we don't always have the information early enough
1780 to not allocate it. But we can at least eliminate the actual loads
1781 and stores during the prologue/epilogue. */
1783 need_to_save_reg (int regno
, int saving
)
1785 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1788 && regno
== PIC_OFFSET_TABLE_REGNUM
1789 && (!saving
|| crtl
->uses_pic_offset_table
)
1791 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1796 /* This function is only correct starting with local register
1799 spu_saved_regs_size (void)
1801 int reg_save_size
= 0;
1804 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1805 if (need_to_save_reg (regno
, 0))
1806 reg_save_size
+= 0x10;
1807 return reg_save_size
;
1811 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1813 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1815 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1816 return emit_insn (gen_movv4si (mem
, reg
));
1820 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1822 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1824 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1825 return emit_insn (gen_movv4si (reg
, mem
));
1828 /* This happens after reload, so we need to expand it. */
1830 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1833 if (satisfies_constraint_K (GEN_INT (imm
)))
1835 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1839 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1840 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1841 if (REGNO (src
) == REGNO (scratch
))
1847 /* Return nonzero if this function is known to have a null epilogue. */
1850 direct_return (void)
1852 if (reload_completed
)
1854 if (cfun
->static_chain_decl
== 0
1855 && (spu_saved_regs_size ()
1857 + crtl
->outgoing_args_size
1858 + crtl
->args
.pretend_args_size
== 0)
1859 && current_function_is_leaf
)
1866 The stack frame looks like this:
1870 AP -> +-------------+
1873 prev SP | back chain |
1876 | reg save | crtl->args.pretend_args_size bytes
1879 | saved regs | spu_saved_regs_size() bytes
1880 FP -> +-------------+
1882 | vars | get_frame_size() bytes
1883 HFP -> +-------------+
1886 | args | crtl->outgoing_args_size bytes
1892 SP -> +-------------+
1896 spu_expand_prologue (void)
1898 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1899 HOST_WIDE_INT total_size
;
1900 HOST_WIDE_INT saved_regs_size
;
1901 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1902 rtx scratch_reg_0
, scratch_reg_1
;
1905 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1906 the "toplevel" insn chain. */
1907 emit_note (NOTE_INSN_DELETED
);
1909 if (flag_pic
&& optimize
== 0)
1910 crtl
->uses_pic_offset_table
= 1;
1912 if (spu_naked_function_p (current_function_decl
))
1915 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1916 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1918 saved_regs_size
= spu_saved_regs_size ();
1919 total_size
= size
+ saved_regs_size
1920 + crtl
->outgoing_args_size
1921 + crtl
->args
.pretend_args_size
;
1923 if (!current_function_is_leaf
1924 || cfun
->calls_alloca
|| total_size
> 0)
1925 total_size
+= STACK_POINTER_OFFSET
;
1927 /* Save this first because code after this might use the link
1928 register as a scratch register. */
1929 if (!current_function_is_leaf
)
1931 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1932 RTX_FRAME_RELATED_P (insn
) = 1;
1937 offset
= -crtl
->args
.pretend_args_size
;
1938 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1939 if (need_to_save_reg (regno
, 1))
1942 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1943 RTX_FRAME_RELATED_P (insn
) = 1;
1947 if (flag_pic
&& crtl
->uses_pic_offset_table
)
1949 rtx pic_reg
= get_pic_reg ();
1950 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1951 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1956 if (flag_stack_check
)
1958 /* We compare against total_size-1 because
1959 ($sp >= total_size) <=> ($sp > total_size-1) */
1960 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1961 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1962 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1963 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1965 emit_move_insn (scratch_v4si
, size_v4si
);
1966 size_v4si
= scratch_v4si
;
1968 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1969 emit_insn (gen_vec_extractv4si
1970 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1971 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1974 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1975 the value of the previous $sp because we save it as the back
1977 if (total_size
<= 2000)
1979 /* In this case we save the back chain first. */
1980 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1982 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1986 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1988 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1990 RTX_FRAME_RELATED_P (insn
) = 1;
1991 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1992 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1994 if (total_size
> 2000)
1996 /* Save the back chain ptr */
1997 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
2000 if (frame_pointer_needed
)
2002 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
2003 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
2004 + crtl
->outgoing_args_size
;
2005 /* Set the new frame_pointer */
2006 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
2007 RTX_FRAME_RELATED_P (insn
) = 1;
2008 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
2009 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
2010 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
2014 emit_note (NOTE_INSN_DELETED
);
2018 spu_expand_epilogue (bool sibcall_p
)
2020 int size
= get_frame_size (), offset
, regno
;
2021 HOST_WIDE_INT saved_regs_size
, total_size
;
2022 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
2023 rtx jump
, scratch_reg_0
;
2025 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2026 the "toplevel" insn chain. */
2027 emit_note (NOTE_INSN_DELETED
);
2029 if (spu_naked_function_p (current_function_decl
))
2032 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
2034 saved_regs_size
= spu_saved_regs_size ();
2035 total_size
= size
+ saved_regs_size
2036 + crtl
->outgoing_args_size
2037 + crtl
->args
.pretend_args_size
;
2039 if (!current_function_is_leaf
2040 || cfun
->calls_alloca
|| total_size
> 0)
2041 total_size
+= STACK_POINTER_OFFSET
;
2045 if (cfun
->calls_alloca
)
2046 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
2048 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
2051 if (saved_regs_size
> 0)
2053 offset
= -crtl
->args
.pretend_args_size
;
2054 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2055 if (need_to_save_reg (regno
, 1))
2058 frame_emit_load (regno
, sp_reg
, offset
);
2063 if (!current_function_is_leaf
)
2064 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2068 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
2069 jump
= emit_jump_insn (gen__return ());
2070 emit_barrier_after (jump
);
2073 emit_note (NOTE_INSN_DELETED
);
2077 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
2081 /* This is inefficient because it ends up copying to a save-register
2082 which then gets saved even though $lr has already been saved. But
2083 it does generate better code for leaf functions and we don't need
2084 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2085 used for __builtin_return_address anyway, so maybe we don't care if
2086 it's inefficient. */
2087 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
2091 /* Given VAL, generate a constant appropriate for MODE.
2092 If MODE is a vector mode, every element will be VAL.
2093 For TImode, VAL will be zero extended to 128 bits. */
2095 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
2101 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
2102 || GET_MODE_CLASS (mode
) == MODE_FLOAT
2103 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
2104 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
2106 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2107 return immed_double_const (val
, 0, mode
);
2109 /* val is the bit representation of the float */
2110 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
2111 return hwint_to_const_double (mode
, val
);
2113 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
2114 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
2116 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
2118 units
= GET_MODE_NUNITS (mode
);
2120 v
= rtvec_alloc (units
);
2122 for (i
= 0; i
< units
; ++i
)
2123 RTVEC_ELT (v
, i
) = inner
;
2125 return gen_rtx_CONST_VECTOR (mode
, v
);
2128 /* Create a MODE vector constant from 4 ints. */
2130 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
2132 unsigned char arr
[16];
2133 arr
[0] = (a
>> 24) & 0xff;
2134 arr
[1] = (a
>> 16) & 0xff;
2135 arr
[2] = (a
>> 8) & 0xff;
2136 arr
[3] = (a
>> 0) & 0xff;
2137 arr
[4] = (b
>> 24) & 0xff;
2138 arr
[5] = (b
>> 16) & 0xff;
2139 arr
[6] = (b
>> 8) & 0xff;
2140 arr
[7] = (b
>> 0) & 0xff;
2141 arr
[8] = (c
>> 24) & 0xff;
2142 arr
[9] = (c
>> 16) & 0xff;
2143 arr
[10] = (c
>> 8) & 0xff;
2144 arr
[11] = (c
>> 0) & 0xff;
2145 arr
[12] = (d
>> 24) & 0xff;
2146 arr
[13] = (d
>> 16) & 0xff;
2147 arr
[14] = (d
>> 8) & 0xff;
2148 arr
[15] = (d
>> 0) & 0xff;
2149 return array_to_constant(mode
, arr
);
2152 /* branch hint stuff */
2154 /* An array of these is used to propagate hints to predecessor blocks. */
2157 rtx prop_jump
; /* propagated from another block */
2158 int bb_index
; /* the original block. */
2160 static struct spu_bb_info
*spu_bb_info
;
2162 #define STOP_HINT_P(INSN) \
2163 (GET_CODE(INSN) == CALL_INSN \
2164 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2165 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2167 /* 1 when RTX is a hinted branch or its target. We keep track of
2168 what has been hinted so the safe-hint code can test it easily. */
2169 #define HINTED_P(RTX) \
2170 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2172 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2173 #define SCHED_ON_EVEN_P(RTX) \
2174 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2176 /* Emit a nop for INSN such that the two will dual issue. This assumes
2177 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2178 We check for TImode to handle a MULTI1 insn which has dual issued its
2179 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2182 emit_nop_for_insn (rtx insn
)
2186 p
= get_pipe (insn
);
2187 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2188 new_insn
= emit_insn_after (gen_lnop (), insn
);
2189 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2191 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2192 PUT_MODE (new_insn
, TImode
);
2193 PUT_MODE (insn
, VOIDmode
);
2196 new_insn
= emit_insn_after (gen_lnop (), insn
);
2197 recog_memoized (new_insn
);
2200 /* Insert nops in basic blocks to meet dual issue alignment
2201 requirements. Also make sure hbrp and hint instructions are at least
2202 one cycle apart, possibly inserting a nop. */
2206 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2210 /* This sets up INSN_ADDRESSES. */
2211 shorten_branches (get_insns ());
2213 /* Keep track of length added by nops. */
2217 insn
= get_insns ();
2218 if (!active_insn_p (insn
))
2219 insn
= next_active_insn (insn
);
2220 for (; insn
; insn
= next_insn
)
2222 next_insn
= next_active_insn (insn
);
2223 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2224 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2228 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2229 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2230 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2233 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2234 PUT_MODE (prev_insn
, GET_MODE (insn
));
2235 PUT_MODE (insn
, TImode
);
2241 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2243 if (GET_MODE (insn
) == TImode
)
2244 PUT_MODE (next_insn
, TImode
);
2246 next_insn
= next_active_insn (insn
);
2248 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2249 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2251 if (((addr
+ length
) & 7) != 0)
2253 emit_nop_for_insn (prev_insn
);
2257 else if (GET_MODE (insn
) == TImode
2258 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2259 || get_attr_type (insn
) == TYPE_MULTI0
)
2260 && ((addr
+ length
) & 7) != 0)
2262 /* prev_insn will always be set because the first insn is
2263 always 8-byte aligned. */
2264 emit_nop_for_insn (prev_insn
);
2272 /* Routines for branch hints. */
2275 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2276 int distance
, sbitmap blocks
)
2278 rtx branch_label
= 0;
2283 if (before
== 0 || branch
== 0 || target
== 0)
2286 /* While scheduling we require hints to be no further than 600, so
2287 we need to enforce that here too */
2291 /* If we have a Basic block note, emit it after the basic block note. */
2292 if (NOTE_KIND (before
) == NOTE_INSN_BASIC_BLOCK
)
2293 before
= NEXT_INSN (before
);
2295 branch_label
= gen_label_rtx ();
2296 LABEL_NUSES (branch_label
)++;
2297 LABEL_PRESERVE_P (branch_label
) = 1;
2298 insn
= emit_label_before (branch_label
, branch
);
2299 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2300 SET_BIT (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2302 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2303 recog_memoized (hint
);
2304 HINTED_P (branch
) = 1;
2306 if (GET_CODE (target
) == LABEL_REF
)
2307 HINTED_P (XEXP (target
, 0)) = 1;
2308 else if (tablejump_p (branch
, 0, &table
))
2312 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2313 vec
= XVEC (PATTERN (table
), 0);
2315 vec
= XVEC (PATTERN (table
), 1);
2316 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2317 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2320 if (distance
>= 588)
2322 /* Make sure the hint isn't scheduled any earlier than this point,
2323 which could make it too far for the branch offest to fit */
2324 recog_memoized (emit_insn_before (gen_blockage (), hint
));
2326 else if (distance
<= 8 * 4)
2328 /* To guarantee at least 8 insns between the hint and branch we
2331 for (d
= distance
; d
< 8 * 4; d
+= 4)
2334 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2335 recog_memoized (insn
);
2338 /* Make sure any nops inserted aren't scheduled before the hint. */
2339 recog_memoized (emit_insn_after (gen_blockage (), hint
));
2341 /* Make sure any nops inserted aren't scheduled after the call. */
2342 if (CALL_P (branch
) && distance
< 8 * 4)
2343 recog_memoized (emit_insn_before (gen_blockage (), branch
));
2347 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2348 the rtx for the branch target. */
2350 get_branch_target (rtx branch
)
2352 if (GET_CODE (branch
) == JUMP_INSN
)
2356 /* Return statements */
2357 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2358 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2361 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2362 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2365 set
= single_set (branch
);
2366 src
= SET_SRC (set
);
2367 if (GET_CODE (SET_DEST (set
)) != PC
)
2370 if (GET_CODE (src
) == IF_THEN_ELSE
)
2373 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2376 /* If the more probable case is not a fall through, then
2377 try a branch hint. */
2378 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2379 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2380 && GET_CODE (XEXP (src
, 1)) != PC
)
2381 lab
= XEXP (src
, 1);
2382 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2383 && GET_CODE (XEXP (src
, 2)) != PC
)
2384 lab
= XEXP (src
, 2);
2388 if (GET_CODE (lab
) == RETURN
)
2389 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2397 else if (GET_CODE (branch
) == CALL_INSN
)
2400 /* All of our call patterns are in a PARALLEL and the CALL is
2401 the first pattern in the PARALLEL. */
2402 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2404 call
= XVECEXP (PATTERN (branch
), 0, 0);
2405 if (GET_CODE (call
) == SET
)
2406 call
= SET_SRC (call
);
2407 if (GET_CODE (call
) != CALL
)
2409 return XEXP (XEXP (call
, 0), 0);
2414 /* The special $hbr register is used to prevent the insn scheduler from
2415 moving hbr insns across instructions which invalidate them. It
2416 should only be used in a clobber, and this function searches for
2417 insns which clobber it. */
2419 insn_clobbers_hbr (rtx insn
)
2422 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2424 rtx parallel
= PATTERN (insn
);
2427 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2429 clobber
= XVECEXP (parallel
, 0, j
);
2430 if (GET_CODE (clobber
) == CLOBBER
2431 && GET_CODE (XEXP (clobber
, 0)) == REG
2432 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2439 /* Search up to 32 insns starting at FIRST:
2440 - at any kind of hinted branch, just return
2441 - at any unconditional branch in the first 15 insns, just return
2442 - at a call or indirect branch, after the first 15 insns, force it to
2443 an even address and return
2444 - at any unconditional branch, after the first 15 insns, force it to
2446 At then end of the search, insert an hbrp within 4 insns of FIRST,
2447 and an hbrp within 16 instructions of FIRST.
2450 insert_hbrp_for_ilb_runout (rtx first
)
2452 rtx insn
, before_4
= 0, before_16
= 0;
2453 int addr
= 0, length
, first_addr
= -1;
2454 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2455 int insert_lnop_after
= 0;
2456 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2459 if (first_addr
== -1)
2460 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2461 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2462 length
= get_attr_length (insn
);
2464 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2466 /* We test for 14 instructions because the first hbrp will add
2467 up to 2 instructions. */
2468 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2471 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2473 /* Make sure an hbrp is at least 2 cycles away from a hint.
2474 Insert an lnop after the hbrp when necessary. */
2475 if (before_4
== 0 && addr
> 0)
2478 insert_lnop_after
|= 1;
2480 else if (before_4
&& addr
<= 4 * 4)
2481 insert_lnop_after
|= 1;
2482 if (before_16
== 0 && addr
> 10 * 4)
2485 insert_lnop_after
|= 2;
2487 else if (before_16
&& addr
<= 14 * 4)
2488 insert_lnop_after
|= 2;
2491 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2493 if (addr
< hbrp_addr0
)
2495 else if (addr
< hbrp_addr1
)
2499 if (CALL_P (insn
) || JUMP_P (insn
))
2501 if (HINTED_P (insn
))
2504 /* Any branch after the first 15 insns should be on an even
2505 address to avoid a special case branch. There might be
2506 some nops and/or hbrps inserted, so we test after 10
2509 SCHED_ON_EVEN_P (insn
) = 1;
2512 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2516 if (addr
+ length
>= 32 * 4)
2518 gcc_assert (before_4
&& before_16
);
2519 if (hbrp_addr0
> 4 * 4)
2522 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2523 recog_memoized (insn
);
2524 INSN_ADDRESSES_NEW (insn
,
2525 INSN_ADDRESSES (INSN_UID (before_4
)));
2526 PUT_MODE (insn
, GET_MODE (before_4
));
2527 PUT_MODE (before_4
, TImode
);
2528 if (insert_lnop_after
& 1)
2530 insn
= emit_insn_before (gen_lnop (), before_4
);
2531 recog_memoized (insn
);
2532 INSN_ADDRESSES_NEW (insn
,
2533 INSN_ADDRESSES (INSN_UID (before_4
)));
2534 PUT_MODE (insn
, TImode
);
2537 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2538 && hbrp_addr1
> 16 * 4)
2541 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2542 recog_memoized (insn
);
2543 INSN_ADDRESSES_NEW (insn
,
2544 INSN_ADDRESSES (INSN_UID (before_16
)));
2545 PUT_MODE (insn
, GET_MODE (before_16
));
2546 PUT_MODE (before_16
, TImode
);
2547 if (insert_lnop_after
& 2)
2549 insn
= emit_insn_before (gen_lnop (), before_16
);
2550 recog_memoized (insn
);
2551 INSN_ADDRESSES_NEW (insn
,
2552 INSN_ADDRESSES (INSN_UID
2554 PUT_MODE (insn
, TImode
);
2560 else if (BARRIER_P (insn
))
2565 /* The SPU might hang when it executes 48 inline instructions after a
2566 hinted branch jumps to its hinted target. The beginning of a
2567 function and the return from a call might have been hinted, and must
2568 be handled as well. To prevent a hang we insert 2 hbrps. The first
2569 should be within 6 insns of the branch target. The second should be
2570 within 22 insns of the branch target. When determining if hbrps are
2571 necessary, we look for only 32 inline instructions, because up to to
2572 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2573 new hbrps, we insert them within 4 and 16 insns of the target. */
2578 if (TARGET_SAFE_HINTS
)
2580 shorten_branches (get_insns ());
2581 /* Insert hbrp at beginning of function */
2582 insn
= next_active_insn (get_insns ());
2584 insert_hbrp_for_ilb_runout (insn
);
2585 /* Insert hbrp after hinted targets. */
2586 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2587 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2588 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2592 static int in_spu_reorg
;
2594 /* Insert branch hints. There are no branch optimizations after this
2595 pass, so it's safe to set our branch hints now. */
2597 spu_machine_dependent_reorg (void)
2602 rtx branch_target
= 0;
2603 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2607 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2609 /* We still do it for unoptimized code because an external
2610 function might have hinted a call or return. */
2616 blocks
= sbitmap_alloc (last_basic_block
);
2617 sbitmap_zero (blocks
);
2620 compute_bb_for_insn ();
2625 (struct spu_bb_info
*) xcalloc (n_basic_blocks
,
2626 sizeof (struct spu_bb_info
));
2628 /* We need exact insn addresses and lengths. */
2629 shorten_branches (get_insns ());
2631 for (i
= n_basic_blocks
- 1; i
>= 0; i
--)
2633 bb
= BASIC_BLOCK (i
);
2635 if (spu_bb_info
[i
].prop_jump
)
2637 branch
= spu_bb_info
[i
].prop_jump
;
2638 branch_target
= get_branch_target (branch
);
2639 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2640 required_dist
= spu_hint_dist
;
2642 /* Search from end of a block to beginning. In this loop, find
2643 jumps which need a branch and emit them only when:
2644 - it's an indirect branch and we're at the insn which sets
2646 - we're at an insn that will invalidate the hint. e.g., a
2647 call, another hint insn, inline asm that clobbers $hbr, and
2648 some inlined operations (divmodsi4). Don't consider jumps
2649 because they are only at the end of a block and are
2650 considered when we are deciding whether to propagate
2651 - we're getting too far away from the branch. The hbr insns
2652 only have a signed 10 bit offset
2653 We go back as far as possible so the branch will be considered
2654 for propagation when we get to the beginning of the block. */
2655 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2659 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2661 && ((GET_CODE (branch_target
) == REG
2662 && set_of (branch_target
, insn
) != NULL_RTX
)
2663 || insn_clobbers_hbr (insn
)
2664 || branch_addr
- insn_addr
> 600))
2666 rtx next
= NEXT_INSN (insn
);
2667 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2668 if (insn
!= BB_END (bb
)
2669 && branch_addr
- next_addr
>= required_dist
)
2673 "hint for %i in block %i before %i\n",
2674 INSN_UID (branch
), bb
->index
,
2676 spu_emit_branch_hint (next
, branch
, branch_target
,
2677 branch_addr
- next_addr
, blocks
);
2682 /* JUMP_P will only be true at the end of a block. When
2683 branch is already set it means we've previously decided
2684 to propagate a hint for that branch into this block. */
2685 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2688 if ((branch_target
= get_branch_target (insn
)))
2691 branch_addr
= insn_addr
;
2692 required_dist
= spu_hint_dist
;
2696 if (insn
== BB_HEAD (bb
))
2702 /* If we haven't emitted a hint for this branch yet, it might
2703 be profitable to emit it in one of the predecessor blocks,
2704 especially for loops. */
2706 basic_block prev
= 0, prop
= 0, prev2
= 0;
2707 int loop_exit
= 0, simple_loop
= 0;
2708 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2710 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2711 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2712 prev
= EDGE_PRED (bb
, j
)->src
;
2714 prev2
= EDGE_PRED (bb
, j
)->src
;
2716 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2717 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2719 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2722 /* If this branch is a loop exit then propagate to previous
2723 fallthru block. This catches the cases when it is a simple
2724 loop or when there is an initial branch into the loop. */
2725 if (prev
&& (loop_exit
|| simple_loop
)
2726 && prev
->loop_depth
<= bb
->loop_depth
)
2729 /* If there is only one adjacent predecessor. Don't propagate
2730 outside this loop. This loop_depth test isn't perfect, but
2731 I'm not sure the loop_father member is valid at this point. */
2732 else if (prev
&& single_pred_p (bb
)
2733 && prev
->loop_depth
== bb
->loop_depth
)
2736 /* If this is the JOIN block of a simple IF-THEN then
2737 propogate the hint to the HEADER block. */
2738 else if (prev
&& prev2
2739 && EDGE_COUNT (bb
->preds
) == 2
2740 && EDGE_COUNT (prev
->preds
) == 1
2741 && EDGE_PRED (prev
, 0)->src
== prev2
2742 && prev2
->loop_depth
== bb
->loop_depth
2743 && GET_CODE (branch_target
) != REG
)
2746 /* Don't propagate when:
2747 - this is a simple loop and the hint would be too far
2748 - this is not a simple loop and there are 16 insns in
2750 - the predecessor block ends in a branch that will be
2752 - the predecessor block ends in an insn that invalidates
2756 && (bbend
= BB_END (prop
))
2757 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2758 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2759 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2762 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2763 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2764 bb
->index
, prop
->index
, bb
->loop_depth
,
2765 INSN_UID (branch
), loop_exit
, simple_loop
,
2766 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2768 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2769 spu_bb_info
[prop
->index
].bb_index
= i
;
2771 else if (branch_addr
- next_addr
>= required_dist
)
2774 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2775 INSN_UID (branch
), bb
->index
,
2776 INSN_UID (NEXT_INSN (insn
)));
2777 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2778 branch_addr
- next_addr
, blocks
);
2785 if (!sbitmap_empty_p (blocks
))
2786 find_many_sub_basic_blocks (blocks
);
2788 /* We have to schedule to make sure alignment is ok. */
2789 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2791 /* The hints need to be scheduled, so call it again. */
2798 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2799 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2801 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2802 between its branch label and the branch . We don't move the
2803 label because GCC expects it at the beginning of the block. */
2804 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2805 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2806 rtx label
= XEXP (label_ref
, 0);
2809 for (branch
= NEXT_INSN (label
);
2810 !JUMP_P (branch
) && !CALL_P (branch
);
2811 branch
= NEXT_INSN (branch
))
2812 if (NONJUMP_INSN_P (branch
))
2813 offset
+= get_attr_length (branch
);
2815 XVECEXP (unspec
, 0, 0) = plus_constant (label_ref
, offset
);
2818 if (spu_flag_var_tracking
)
2821 timevar_push (TV_VAR_TRACKING
);
2822 variable_tracking_main ();
2823 timevar_pop (TV_VAR_TRACKING
);
2824 df_finish_pass (false);
2827 free_bb_for_insn ();
2833 /* Insn scheduling routines, primarily for dual issue. */
2835 spu_sched_issue_rate (void)
2841 uses_ls_unit(rtx insn
)
2843 rtx set
= single_set (insn
);
2845 && (GET_CODE (SET_DEST (set
)) == MEM
2846 || GET_CODE (SET_SRC (set
)) == MEM
))
2855 /* Handle inline asm */
2856 if (INSN_CODE (insn
) == -1)
2858 t
= get_attr_type (insn
);
2883 case TYPE_IPREFETCH
:
2891 /* haifa-sched.c has a static variable that keeps track of the current
2892 cycle. It is passed to spu_sched_reorder, and we record it here for
2893 use by spu_sched_variable_issue. It won't be accurate if the
2894 scheduler updates it's clock_var between the two calls. */
2895 static int clock_var
;
2897 /* This is used to keep track of insn alignment. Set to 0 at the
2898 beginning of each block and increased by the "length" attr of each
2900 static int spu_sched_length
;
2902 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2903 ready list appropriately in spu_sched_reorder(). */
2904 static int pipe0_clock
;
2905 static int pipe1_clock
;
2907 static int prev_clock_var
;
2909 static int prev_priority
;
2911 /* The SPU needs to load the next ilb sometime during the execution of
2912 the previous ilb. There is a potential conflict if every cycle has a
2913 load or store. To avoid the conflict we make sure the load/store
2914 unit is free for at least one cycle during the execution of insns in
2915 the previous ilb. */
2916 static int spu_ls_first
;
2917 static int prev_ls_clock
;
2920 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2921 int max_ready ATTRIBUTE_UNUSED
)
2923 spu_sched_length
= 0;
2927 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2928 int max_ready ATTRIBUTE_UNUSED
)
2930 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2932 /* When any block might be at least 8-byte aligned, assume they
2933 will all be at least 8-byte aligned to make sure dual issue
2934 works out correctly. */
2935 spu_sched_length
= 0;
2937 spu_ls_first
= INT_MAX
;
2942 prev_clock_var
= -1;
2947 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2948 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
2952 if (GET_CODE (PATTERN (insn
)) == USE
2953 || GET_CODE (PATTERN (insn
)) == CLOBBER
2954 || (len
= get_attr_length (insn
)) == 0)
2957 spu_sched_length
+= len
;
2959 /* Reset on inline asm */
2960 if (INSN_CODE (insn
) == -1)
2962 spu_ls_first
= INT_MAX
;
2967 p
= get_pipe (insn
);
2969 pipe0_clock
= clock_var
;
2971 pipe1_clock
= clock_var
;
2975 if (clock_var
- prev_ls_clock
> 1
2976 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2977 spu_ls_first
= INT_MAX
;
2978 if (uses_ls_unit (insn
))
2980 if (spu_ls_first
== INT_MAX
)
2981 spu_ls_first
= spu_sched_length
;
2982 prev_ls_clock
= clock_var
;
2985 /* The scheduler hasn't inserted the nop, but we will later on.
2986 Include those nops in spu_sched_length. */
2987 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2988 spu_sched_length
+= 4;
2989 prev_clock_var
= clock_var
;
2991 /* more is -1 when called from spu_sched_reorder for new insns
2992 that don't have INSN_PRIORITY */
2994 prev_priority
= INSN_PRIORITY (insn
);
2997 /* Always try issueing more insns. spu_sched_reorder will decide
2998 when the cycle should be advanced. */
3002 /* This function is called for both TARGET_SCHED_REORDER and
3003 TARGET_SCHED_REORDER2. */
3005 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3006 rtx
*ready
, int *nreadyp
, int clock
)
3008 int i
, nready
= *nreadyp
;
3009 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
3014 if (nready
<= 0 || pipe1_clock
>= clock
)
3017 /* Find any rtl insns that don't generate assembly insns and schedule
3019 for (i
= nready
- 1; i
>= 0; i
--)
3022 if (INSN_CODE (insn
) == -1
3023 || INSN_CODE (insn
) == CODE_FOR_blockage
3024 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
3026 ready
[i
] = ready
[nready
- 1];
3027 ready
[nready
- 1] = insn
;
3032 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
3033 for (i
= 0; i
< nready
; i
++)
3034 if (INSN_CODE (ready
[i
]) != -1)
3037 switch (get_attr_type (insn
))
3062 case TYPE_IPREFETCH
:
3068 /* In the first scheduling phase, schedule loads and stores together
3069 to increase the chance they will get merged during postreload CSE. */
3070 if (!reload_completed
&& pipe_ls
>= 0)
3072 insn
= ready
[pipe_ls
];
3073 ready
[pipe_ls
] = ready
[nready
- 1];
3074 ready
[nready
- 1] = insn
;
3078 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3082 /* When we have loads/stores in every cycle of the last 15 insns and
3083 we are about to schedule another load/store, emit an hbrp insn
3086 && spu_sched_length
- spu_ls_first
>= 4 * 15
3087 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
3089 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3090 recog_memoized (insn
);
3091 if (pipe0_clock
< clock
)
3092 PUT_MODE (insn
, TImode
);
3093 spu_sched_variable_issue (file
, verbose
, insn
, -1);
3097 /* In general, we want to emit nops to increase dual issue, but dual
3098 issue isn't faster when one of the insns could be scheduled later
3099 without effecting the critical path. We look at INSN_PRIORITY to
3100 make a good guess, but it isn't perfect so -mdual-nops=n can be
3101 used to effect it. */
3102 if (in_spu_reorg
&& spu_dual_nops
< 10)
3104 /* When we are at an even address and we are not issueing nops to
3105 improve scheduling then we need to advance the cycle. */
3106 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
3107 && (spu_dual_nops
== 0
3110 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
3113 /* When at an odd address, schedule the highest priority insn
3114 without considering pipeline. */
3115 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
3116 && (spu_dual_nops
== 0
3118 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
3123 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3124 pipe0 insn in the ready list, schedule it. */
3125 if (pipe0_clock
< clock
&& pipe_0
>= 0)
3126 schedule_i
= pipe_0
;
3128 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3129 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3131 schedule_i
= pipe_1
;
3133 if (schedule_i
> -1)
3135 insn
= ready
[schedule_i
];
3136 ready
[schedule_i
] = ready
[nready
- 1];
3137 ready
[nready
- 1] = insn
;
3143 /* INSN is dependent on DEP_INSN. */
3145 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
3149 /* The blockage pattern is used to prevent instructions from being
3150 moved across it and has no cost. */
3151 if (INSN_CODE (insn
) == CODE_FOR_blockage
3152 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3155 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3156 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3159 /* Make sure hbrps are spread out. */
3160 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3161 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3164 /* Make sure hints and hbrps are 2 cycles apart. */
3165 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3166 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3167 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3168 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3171 /* An hbrp has no real dependency on other insns. */
3172 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3173 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3176 /* Assuming that it is unlikely an argument register will be used in
3177 the first cycle of the called function, we reduce the cost for
3178 slightly better scheduling of dep_insn. When not hinted, the
3179 mispredicted branch would hide the cost as well. */
3182 rtx target
= get_branch_target (insn
);
3183 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3188 /* And when returning from a function, let's assume the return values
3189 are completed sooner too. */
3190 if (CALL_P (dep_insn
))
3193 /* Make sure an instruction that loads from the back chain is schedule
3194 away from the return instruction so a hint is more likely to get
3196 if (INSN_CODE (insn
) == CODE_FOR__return
3197 && (set
= single_set (dep_insn
))
3198 && GET_CODE (SET_DEST (set
)) == REG
3199 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3202 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3203 scheduler makes every insn in a block anti-dependent on the final
3204 jump_insn. We adjust here so higher cost insns will get scheduled
3206 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3207 return insn_cost (dep_insn
) - 3;
3212 /* Create a CONST_DOUBLE from a string. */
3214 spu_float_const (const char *string
, enum machine_mode mode
)
3216 REAL_VALUE_TYPE value
;
3217 value
= REAL_VALUE_ATOF (string
, mode
);
3218 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3222 spu_constant_address_p (rtx x
)
3224 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3225 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3226 || GET_CODE (x
) == HIGH
);
3229 static enum spu_immediate
3230 which_immediate_load (HOST_WIDE_INT val
)
3232 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3234 if (val
>= -0x8000 && val
<= 0x7fff)
3236 if (val
>= 0 && val
<= 0x3ffff)
3238 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3240 if ((val
& 0xffff) == 0)
3246 /* Return true when OP can be loaded by one of the il instructions, or
3247 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3249 immediate_load_p (rtx op
, enum machine_mode mode
)
3251 if (CONSTANT_P (op
))
3253 enum immediate_class c
= classify_immediate (op
, mode
);
3254 return c
== IC_IL1
|| c
== IC_IL1s
3255 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3260 /* Return true if the first SIZE bytes of arr is a constant that can be
3261 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3262 represent the size and offset of the instruction to use. */
3264 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3266 int cpat
, run
, i
, start
;
3270 for (i
= 0; i
< size
&& cpat
; i
++)
3278 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3280 else if (arr
[i
] == 0)
3282 while (arr
[i
+run
] == run
&& i
+run
< 16)
3284 if (run
!= 4 && run
!= 8)
3289 if ((i
& (run
-1)) != 0)
3296 if (cpat
&& (run
|| size
< 16))
3303 *pstart
= start
== -1 ? 16-run
: start
;
3309 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3310 it into a register. MODE is only valid when OP is a CONST_INT. */
3311 static enum immediate_class
3312 classify_immediate (rtx op
, enum machine_mode mode
)
3315 unsigned char arr
[16];
3316 int i
, j
, repeated
, fsmbi
, repeat
;
3318 gcc_assert (CONSTANT_P (op
));
3320 if (GET_MODE (op
) != VOIDmode
)
3321 mode
= GET_MODE (op
);
3323 /* A V4SI const_vector with all identical symbols is ok. */
3326 && GET_CODE (op
) == CONST_VECTOR
3327 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3328 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3329 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3330 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3331 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3332 op
= CONST_VECTOR_ELT (op
, 0);
3334 switch (GET_CODE (op
))
3338 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3341 /* We can never know if the resulting address fits in 18 bits and can be
3342 loaded with ila. For now, assume the address will not overflow if
3343 the displacement is "small" (fits 'K' constraint). */
3344 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3346 rtx sym
= XEXP (XEXP (op
, 0), 0);
3347 rtx cst
= XEXP (XEXP (op
, 0), 1);
3349 if (GET_CODE (sym
) == SYMBOL_REF
3350 && GET_CODE (cst
) == CONST_INT
3351 && satisfies_constraint_K (cst
))
3360 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3361 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3362 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3368 constant_to_array (mode
, op
, arr
);
3370 /* Check that each 4-byte slot is identical. */
3372 for (i
= 4; i
< 16; i
+= 4)
3373 for (j
= 0; j
< 4; j
++)
3374 if (arr
[j
] != arr
[i
+ j
])
3379 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3380 val
= trunc_int_for_mode (val
, SImode
);
3382 if (which_immediate_load (val
) != SPU_NONE
)
3386 /* Any mode of 2 bytes or smaller can be loaded with an il
3388 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3392 for (i
= 0; i
< 16 && fsmbi
; i
++)
3393 if (arr
[i
] != 0 && repeat
== 0)
3395 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3398 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3400 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3413 static enum spu_immediate
3414 which_logical_immediate (HOST_WIDE_INT val
)
3416 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3418 if (val
>= -0x200 && val
<= 0x1ff)
3420 if (val
>= 0 && val
<= 0xffff)
3422 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3424 val
= trunc_int_for_mode (val
, HImode
);
3425 if (val
>= -0x200 && val
<= 0x1ff)
3427 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3429 val
= trunc_int_for_mode (val
, QImode
);
3430 if (val
>= -0x200 && val
<= 0x1ff)
3437 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3440 const_vector_immediate_p (rtx x
)
3443 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3444 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3445 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3446 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3452 logical_immediate_p (rtx op
, enum machine_mode mode
)
3455 unsigned char arr
[16];
3458 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3459 || GET_CODE (op
) == CONST_VECTOR
);
3461 if (GET_CODE (op
) == CONST_VECTOR
3462 && !const_vector_immediate_p (op
))
3465 if (GET_MODE (op
) != VOIDmode
)
3466 mode
= GET_MODE (op
);
3468 constant_to_array (mode
, op
, arr
);
3470 /* Check that bytes are repeated. */
3471 for (i
= 4; i
< 16; i
+= 4)
3472 for (j
= 0; j
< 4; j
++)
3473 if (arr
[j
] != arr
[i
+ j
])
3476 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3477 val
= trunc_int_for_mode (val
, SImode
);
3479 i
= which_logical_immediate (val
);
3480 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3484 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3487 unsigned char arr
[16];
3490 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3491 || GET_CODE (op
) == CONST_VECTOR
);
3493 if (GET_CODE (op
) == CONST_VECTOR
3494 && !const_vector_immediate_p (op
))
3497 if (GET_MODE (op
) != VOIDmode
)
3498 mode
= GET_MODE (op
);
3500 constant_to_array (mode
, op
, arr
);
3502 /* Check that bytes are repeated. */
3503 for (i
= 4; i
< 16; i
+= 4)
3504 for (j
= 0; j
< 4; j
++)
3505 if (arr
[j
] != arr
[i
+ j
])
3508 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3509 val
= trunc_int_for_mode (val
, SImode
);
3511 return val
>= 0 && val
<= 0xffff;
3515 arith_immediate_p (rtx op
, enum machine_mode mode
,
3516 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3519 unsigned char arr
[16];
3522 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3523 || GET_CODE (op
) == CONST_VECTOR
);
3525 if (GET_CODE (op
) == CONST_VECTOR
3526 && !const_vector_immediate_p (op
))
3529 if (GET_MODE (op
) != VOIDmode
)
3530 mode
= GET_MODE (op
);
3532 constant_to_array (mode
, op
, arr
);
3534 if (VECTOR_MODE_P (mode
))
3535 mode
= GET_MODE_INNER (mode
);
3537 bytes
= GET_MODE_SIZE (mode
);
3538 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3540 /* Check that bytes are repeated. */
3541 for (i
= bytes
; i
< 16; i
+= bytes
)
3542 for (j
= 0; j
< bytes
; j
++)
3543 if (arr
[j
] != arr
[i
+ j
])
3547 for (j
= 1; j
< bytes
; j
++)
3548 val
= (val
<< 8) | arr
[j
];
3550 val
= trunc_int_for_mode (val
, mode
);
3552 return val
>= low
&& val
<= high
;
3555 /* TRUE when op is an immediate and an exact power of 2, and given that
3556 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3557 all entries must be the same. */
3559 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3561 enum machine_mode int_mode
;
3563 unsigned char arr
[16];
3566 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3567 || GET_CODE (op
) == CONST_VECTOR
);
3569 if (GET_CODE (op
) == CONST_VECTOR
3570 && !const_vector_immediate_p (op
))
3573 if (GET_MODE (op
) != VOIDmode
)
3574 mode
= GET_MODE (op
);
3576 constant_to_array (mode
, op
, arr
);
3578 if (VECTOR_MODE_P (mode
))
3579 mode
= GET_MODE_INNER (mode
);
3581 bytes
= GET_MODE_SIZE (mode
);
3582 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3584 /* Check that bytes are repeated. */
3585 for (i
= bytes
; i
< 16; i
+= bytes
)
3586 for (j
= 0; j
< bytes
; j
++)
3587 if (arr
[j
] != arr
[i
+ j
])
3591 for (j
= 1; j
< bytes
; j
++)
3592 val
= (val
<< 8) | arr
[j
];
3594 val
= trunc_int_for_mode (val
, int_mode
);
3596 /* Currently, we only handle SFmode */
3597 gcc_assert (mode
== SFmode
);
3600 int exp
= (val
>> 23) - 127;
3601 return val
> 0 && (val
& 0x007fffff) == 0
3602 && exp
>= low
&& exp
<= high
;
3608 - any 32-bit constant (SImode, SFmode)
3609 - any constant that can be generated with fsmbi (any mode)
3610 - a 64-bit constant where the high and low bits are identical
3612 - a 128-bit constant where the four 32-bit words match. */
3614 spu_legitimate_constant_p (rtx x
)
3616 if (GET_CODE (x
) == HIGH
)
3618 /* V4SI with all identical symbols is valid. */
3620 && GET_MODE (x
) == V4SImode
3621 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3622 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3623 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3624 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3625 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3626 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3628 if (GET_CODE (x
) == CONST_VECTOR
3629 && !const_vector_immediate_p (x
))
3634 /* Valid address are:
3635 - symbol_ref, label_ref, const
3637 - reg + const_int, where const_int is 16 byte aligned
3638 - reg + reg, alignment doesn't matter
3639 The alignment matters in the reg+const case because lqd and stqd
3640 ignore the 4 least significant bits of the const. We only care about
3641 16 byte modes because the expand phase will change all smaller MEM
3642 references to TImode. */
3644 spu_legitimate_address_p (enum machine_mode mode
,
3645 rtx x
, bool reg_ok_strict
)
3647 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3649 && GET_CODE (x
) == AND
3650 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3651 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3653 switch (GET_CODE (x
))
3658 return !TARGET_LARGE_MEM
;
3661 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3669 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3674 rtx op0
= XEXP (x
, 0);
3675 rtx op1
= XEXP (x
, 1);
3676 if (GET_CODE (op0
) == SUBREG
)
3677 op0
= XEXP (op0
, 0);
3678 if (GET_CODE (op1
) == SUBREG
)
3679 op1
= XEXP (op1
, 0);
3680 if (GET_CODE (op0
) == REG
3681 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3682 && GET_CODE (op1
) == CONST_INT
3683 && INTVAL (op1
) >= -0x2000
3684 && INTVAL (op1
) <= 0x1fff
3685 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3687 if (GET_CODE (op0
) == REG
3688 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3689 && GET_CODE (op1
) == REG
3690 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3701 /* When the address is reg + const_int, force the const_int into a
3704 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3705 enum machine_mode mode ATTRIBUTE_UNUSED
)
3708 /* Make sure both operands are registers. */
3709 if (GET_CODE (x
) == PLUS
)
3713 if (ALIGNED_SYMBOL_REF_P (op0
))
3715 op0
= force_reg (Pmode
, op0
);
3716 mark_reg_pointer (op0
, 128);
3718 else if (GET_CODE (op0
) != REG
)
3719 op0
= force_reg (Pmode
, op0
);
3720 if (ALIGNED_SYMBOL_REF_P (op1
))
3722 op1
= force_reg (Pmode
, op1
);
3723 mark_reg_pointer (op1
, 128);
3725 else if (GET_CODE (op1
) != REG
)
3726 op1
= force_reg (Pmode
, op1
);
3727 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3732 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3733 struct attribute_spec.handler. */
3735 spu_handle_fndecl_attribute (tree
* node
,
3737 tree args ATTRIBUTE_UNUSED
,
3738 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3740 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3742 warning (0, "%qE attribute only applies to functions",
3744 *no_add_attrs
= true;
3750 /* Handle the "vector" attribute. */
3752 spu_handle_vector_attribute (tree
* node
, tree name
,
3753 tree args ATTRIBUTE_UNUSED
,
3754 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3756 tree type
= *node
, result
= NULL_TREE
;
3757 enum machine_mode mode
;
3760 while (POINTER_TYPE_P (type
)
3761 || TREE_CODE (type
) == FUNCTION_TYPE
3762 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3763 type
= TREE_TYPE (type
);
3765 mode
= TYPE_MODE (type
);
3767 unsigned_p
= TYPE_UNSIGNED (type
);
3771 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3774 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3777 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3780 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3783 result
= V4SF_type_node
;
3786 result
= V2DF_type_node
;
3792 /* Propagate qualifiers attached to the element type
3793 onto the vector type. */
3794 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3795 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3797 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3800 warning (0, "%qE attribute ignored", name
);
3802 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3807 /* Return nonzero if FUNC is a naked function. */
3809 spu_naked_function_p (tree func
)
3813 if (TREE_CODE (func
) != FUNCTION_DECL
)
3816 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3817 return a
!= NULL_TREE
;
3821 spu_initial_elimination_offset (int from
, int to
)
3823 int saved_regs_size
= spu_saved_regs_size ();
3825 if (!current_function_is_leaf
|| crtl
->outgoing_args_size
3826 || get_frame_size () || saved_regs_size
)
3827 sp_offset
= STACK_POINTER_OFFSET
;
3828 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3829 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3830 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3831 return get_frame_size ();
3832 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3833 return sp_offset
+ crtl
->outgoing_args_size
3834 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3835 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3836 return get_frame_size () + saved_regs_size
+ sp_offset
;
3842 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3844 enum machine_mode mode
= TYPE_MODE (type
);
3845 int byte_size
= ((mode
== BLKmode
)
3846 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3848 /* Make sure small structs are left justified in a register. */
3849 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3850 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3852 enum machine_mode smode
;
3855 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3856 int n
= byte_size
/ UNITS_PER_WORD
;
3857 v
= rtvec_alloc (nregs
);
3858 for (i
= 0; i
< n
; i
++)
3860 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3861 gen_rtx_REG (TImode
,
3864 GEN_INT (UNITS_PER_WORD
* i
));
3865 byte_size
-= UNITS_PER_WORD
;
3873 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3875 gen_rtx_EXPR_LIST (VOIDmode
,
3876 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3877 GEN_INT (UNITS_PER_WORD
* n
));
3879 return gen_rtx_PARALLEL (mode
, v
);
3881 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3885 spu_function_arg (CUMULATIVE_ARGS cum
,
3886 enum machine_mode mode
,
3887 tree type
, int named ATTRIBUTE_UNUSED
)
3891 if (cum
>= MAX_REGISTER_ARGS
)
3894 byte_size
= ((mode
== BLKmode
)
3895 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3897 /* The ABI does not allow parameters to be passed partially in
3898 reg and partially in stack. */
3899 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3902 /* Make sure small structs are left justified in a register. */
3903 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3904 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3906 enum machine_mode smode
;
3910 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3911 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3912 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
3914 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3917 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
3920 /* Variable sized types are passed by reference. */
3922 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
3923 enum machine_mode mode ATTRIBUTE_UNUSED
,
3924 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3926 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3932 /* Create and return the va_list datatype.
3934 On SPU, va_list is an array type equivalent to
3936 typedef struct __va_list_tag
3938 void *__args __attribute__((__aligned(16)));
3939 void *__skip __attribute__((__aligned(16)));
3943 where __args points to the arg that will be returned by the next
3944 va_arg(), and __skip points to the previous stack frame such that
3945 when __args == __skip we should advance __args by 32 bytes. */
3947 spu_build_builtin_va_list (void)
3949 tree f_args
, f_skip
, record
, type_decl
;
3952 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3955 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3957 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3958 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3960 DECL_FIELD_CONTEXT (f_args
) = record
;
3961 DECL_ALIGN (f_args
) = 128;
3962 DECL_USER_ALIGN (f_args
) = 1;
3964 DECL_FIELD_CONTEXT (f_skip
) = record
;
3965 DECL_ALIGN (f_skip
) = 128;
3966 DECL_USER_ALIGN (f_skip
) = 1;
3968 TREE_CHAIN (record
) = type_decl
;
3969 TYPE_NAME (record
) = type_decl
;
3970 TYPE_FIELDS (record
) = f_args
;
3971 TREE_CHAIN (f_args
) = f_skip
;
3973 /* We know this is being padded and we want it too. It is an internal
3974 type so hide the warnings from the user. */
3976 warn_padded
= false;
3978 layout_type (record
);
3982 /* The correct type is an array type of one element. */
3983 return build_array_type (record
, build_index_type (size_zero_node
));
3986 /* Implement va_start by filling the va_list structure VALIST.
3987 NEXTARG points to the first anonymous stack argument.
3989 The following global variables are used to initialize
3990 the va_list structure:
3993 the CUMULATIVE_ARGS for this function
3995 crtl->args.arg_offset_rtx:
3996 holds the offset of the first anonymous stack argument
3997 (relative to the virtual arg pointer). */
4000 spu_va_start (tree valist
, rtx nextarg
)
4002 tree f_args
, f_skip
;
4005 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4006 f_skip
= TREE_CHAIN (f_args
);
4008 valist
= build_va_arg_indirect_ref (valist
);
4010 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4012 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4014 /* Find the __args area. */
4015 t
= make_tree (TREE_TYPE (args
), nextarg
);
4016 if (crtl
->args
.pretend_args_size
> 0)
4017 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
4018 size_int (-STACK_POINTER_OFFSET
));
4019 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4020 TREE_SIDE_EFFECTS (t
) = 1;
4021 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4023 /* Find the __skip area. */
4024 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4025 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
4026 size_int (crtl
->args
.pretend_args_size
4027 - STACK_POINTER_OFFSET
));
4028 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4029 TREE_SIDE_EFFECTS (t
) = 1;
4030 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4033 /* Gimplify va_arg by updating the va_list structure
4034 VALIST as required to retrieve an argument of type
4035 TYPE, and returning that argument.
4037 ret = va_arg(VALIST, TYPE);
4039 generates code equivalent to:
4041 paddedsize = (sizeof(TYPE) + 15) & -16;
4042 if (VALIST.__args + paddedsize > VALIST.__skip
4043 && VALIST.__args <= VALIST.__skip)
4044 addr = VALIST.__skip + 32;
4046 addr = VALIST.__args;
4047 VALIST.__args = addr + paddedsize;
4048 ret = *(TYPE *)addr;
4051 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4052 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4054 tree f_args
, f_skip
;
4056 HOST_WIDE_INT size
, rsize
;
4057 tree paddedsize
, addr
, tmp
;
4058 bool pass_by_reference_p
;
4060 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4061 f_skip
= TREE_CHAIN (f_args
);
4063 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4065 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4067 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4069 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4070 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4072 /* if an object is dynamically sized, a pointer to it is passed
4073 instead of the object itself. */
4074 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4076 if (pass_by_reference_p
)
4077 type
= build_pointer_type (type
);
4078 size
= int_size_in_bytes (type
);
4079 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4081 /* build conditional expression to calculate addr. The expression
4082 will be gimplified later. */
4083 paddedsize
= size_int (rsize
);
4084 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (args
), paddedsize
);
4085 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4086 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4087 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4088 unshare_expr (skip
)));
4090 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4091 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (skip
),
4092 size_int (32)), unshare_expr (args
));
4094 gimplify_assign (addr
, tmp
, pre_p
);
4096 /* update VALIST.__args */
4097 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
4098 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4100 addr
= fold_convert (build_pointer_type (type
), addr
);
4102 if (pass_by_reference_p
)
4103 addr
= build_va_arg_indirect_ref (addr
);
4105 return build_va_arg_indirect_ref (addr
);
4108 /* Save parameter registers starting with the register that corresponds
4109 to the first unnamed parameters. If the first unnamed parameter is
4110 in the stack then save no registers. Set pretend_args_size to the
4111 amount of space needed to save the registers. */
4113 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4114 tree type
, int *pretend_size
, int no_rtl
)
4123 /* cum currently points to the last named argument, we want to
4124 start at the next argument. */
4125 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
4127 offset
= -STACK_POINTER_OFFSET
;
4128 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4130 tmp
= gen_frame_mem (V4SImode
,
4131 plus_constant (virtual_incoming_args_rtx
,
4133 emit_move_insn (tmp
,
4134 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4137 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4142 spu_conditional_register_usage (void)
4146 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4147 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4151 /* This is called any time we inspect the alignment of a register for
4154 reg_aligned_for_addr (rtx x
)
4157 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4158 return REGNO_POINTER_ALIGN (regno
) >= 128;
4161 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4162 into its SYMBOL_REF_FLAGS. */
4164 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4166 default_encode_section_info (decl
, rtl
, first
);
4168 /* If a variable has a forced alignment to < 16 bytes, mark it with
4169 SYMBOL_FLAG_ALIGN1. */
4170 if (TREE_CODE (decl
) == VAR_DECL
4171 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4172 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4175 /* Return TRUE if we are certain the mem refers to a complete object
4176 which is both 16-byte aligned and padded to a 16-byte boundary. This
4177 would make it safe to store with a single instruction.
4178 We guarantee the alignment and padding for static objects by aligning
4179 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4180 FIXME: We currently cannot guarantee this for objects on the stack
4181 because assign_parm_setup_stack calls assign_stack_local with the
4182 alignment of the parameter mode and in that case the alignment never
4183 gets adjusted by LOCAL_ALIGNMENT. */
4185 store_with_one_insn_p (rtx mem
)
4187 enum machine_mode mode
= GET_MODE (mem
);
4188 rtx addr
= XEXP (mem
, 0);
4189 if (mode
== BLKmode
)
4191 if (GET_MODE_SIZE (mode
) >= 16)
4193 /* Only static objects. */
4194 if (GET_CODE (addr
) == SYMBOL_REF
)
4196 /* We use the associated declaration to make sure the access is
4197 referring to the whole object.
4198 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4199 if it is necessary. Will there be cases where one exists, and
4200 the other does not? Will there be cases where both exist, but
4201 have different types? */
4202 tree decl
= MEM_EXPR (mem
);
4204 && TREE_CODE (decl
) == VAR_DECL
4205 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4207 decl
= SYMBOL_REF_DECL (addr
);
4209 && TREE_CODE (decl
) == VAR_DECL
4210 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4216 /* Return 1 when the address is not valid for a simple load and store as
4217 required by the '_mov*' patterns. We could make this less strict
4218 for loads, but we prefer mem's to look the same so they are more
4219 likely to be merged. */
4221 address_needs_split (rtx mem
)
4223 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4224 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4225 || !(store_with_one_insn_p (mem
)
4226 || mem_is_padded_component_ref (mem
))))
4233 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4235 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4238 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4240 rtx from
= SUBREG_REG (ops
[1]);
4241 enum machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4243 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4244 && GET_MODE_CLASS (imode
) == MODE_INT
4245 && subreg_lowpart_p (ops
[1]));
4247 if (GET_MODE_SIZE (imode
) < 4)
4249 if (imode
!= GET_MODE (from
))
4250 from
= gen_rtx_SUBREG (imode
, from
, 0);
4252 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4254 enum insn_code icode
= convert_optab_handler (trunc_optab
, mode
, imode
)->insn_code
;
4255 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4258 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4262 /* At least one of the operands needs to be a register. */
4263 if ((reload_in_progress
| reload_completed
) == 0
4264 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4266 rtx temp
= force_reg (mode
, ops
[1]);
4267 emit_move_insn (ops
[0], temp
);
4270 if (reload_in_progress
|| reload_completed
)
4272 if (CONSTANT_P (ops
[1]))
4273 return spu_split_immediate (ops
);
4277 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4279 if (GET_CODE (ops
[1]) == CONST_INT
)
4281 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4282 if (val
!= INTVAL (ops
[1]))
4284 emit_move_insn (ops
[0], GEN_INT (val
));
4289 return spu_split_store (ops
);
4291 return spu_split_load (ops
);
4297 spu_convert_move (rtx dst
, rtx src
)
4299 enum machine_mode mode
= GET_MODE (dst
);
4300 enum machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4302 gcc_assert (GET_MODE (src
) == TImode
);
4303 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4304 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4305 gen_rtx_TRUNCATE (int_mode
,
4306 gen_rtx_LSHIFTRT (TImode
, src
,
4307 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4308 if (int_mode
!= mode
)
4310 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4311 emit_move_insn (dst
, reg
);
4315 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4316 the address from SRC and SRC+16. Return a REG or CONST_INT that
4317 specifies how many bytes to rotate the loaded registers, plus any
4318 extra from EXTRA_ROTQBY. The address and rotate amounts are
4319 normalized to improve merging of loads and rotate computations. */
4321 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4323 rtx addr
= XEXP (src
, 0);
4324 rtx p0
, p1
, rot
, addr0
, addr1
;
4330 if (MEM_ALIGN (src
) >= 128)
4331 /* Address is already aligned; simply perform a TImode load. */ ;
4332 else if (GET_CODE (addr
) == PLUS
)
4335 aligned reg + aligned reg => lqx
4336 aligned reg + unaligned reg => lqx, rotqby
4337 aligned reg + aligned const => lqd
4338 aligned reg + unaligned const => lqd, rotqbyi
4339 unaligned reg + aligned reg => lqx, rotqby
4340 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4341 unaligned reg + aligned const => lqd, rotqby
4342 unaligned reg + unaligned const -> not allowed by legitimate address
4344 p0
= XEXP (addr
, 0);
4345 p1
= XEXP (addr
, 1);
4346 if (!reg_aligned_for_addr (p0
))
4348 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4350 rot
= gen_reg_rtx (SImode
);
4351 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4353 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4357 && INTVAL (p1
) * BITS_PER_UNIT
4358 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4360 rot
= gen_reg_rtx (SImode
);
4361 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4366 rtx x
= gen_reg_rtx (SImode
);
4367 emit_move_insn (x
, p1
);
4368 if (!spu_arith_operand (p1
, SImode
))
4370 rot
= gen_reg_rtx (SImode
);
4371 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4372 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4380 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4382 rot_amt
= INTVAL (p1
) & 15;
4383 if (INTVAL (p1
) & -16)
4385 p1
= GEN_INT (INTVAL (p1
) & -16);
4386 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4391 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4395 else if (REG_P (addr
))
4397 if (!reg_aligned_for_addr (addr
))
4400 else if (GET_CODE (addr
) == CONST
)
4402 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4403 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4404 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4406 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4408 addr
= gen_rtx_CONST (Pmode
,
4409 gen_rtx_PLUS (Pmode
,
4410 XEXP (XEXP (addr
, 0), 0),
4411 GEN_INT (rot_amt
& -16)));
4413 addr
= XEXP (XEXP (addr
, 0), 0);
4417 rot
= gen_reg_rtx (Pmode
);
4418 emit_move_insn (rot
, addr
);
4421 else if (GET_CODE (addr
) == CONST_INT
)
4423 rot_amt
= INTVAL (addr
);
4424 addr
= GEN_INT (rot_amt
& -16);
4426 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4428 rot
= gen_reg_rtx (Pmode
);
4429 emit_move_insn (rot
, addr
);
4432 rot_amt
+= extra_rotby
;
4438 rtx x
= gen_reg_rtx (SImode
);
4439 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4443 if (!rot
&& rot_amt
)
4444 rot
= GEN_INT (rot_amt
);
4446 addr0
= copy_rtx (addr
);
4447 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4448 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4452 addr1
= plus_constant (copy_rtx (addr
), 16);
4453 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4454 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4461 spu_split_load (rtx
* ops
)
4463 enum machine_mode mode
= GET_MODE (ops
[0]);
4464 rtx addr
, load
, rot
;
4467 if (GET_MODE_SIZE (mode
) >= 16)
4470 addr
= XEXP (ops
[1], 0);
4471 gcc_assert (GET_CODE (addr
) != AND
);
4473 if (!address_needs_split (ops
[1]))
4475 ops
[1] = change_address (ops
[1], TImode
, addr
);
4476 load
= gen_reg_rtx (TImode
);
4477 emit_insn (gen__movti (load
, ops
[1]));
4478 spu_convert_move (ops
[0], load
);
4482 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4484 load
= gen_reg_rtx (TImode
);
4485 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4488 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4490 spu_convert_move (ops
[0], load
);
4495 spu_split_store (rtx
* ops
)
4497 enum machine_mode mode
= GET_MODE (ops
[0]);
4499 rtx addr
, p0
, p1
, p1_lo
, smem
;
4503 if (GET_MODE_SIZE (mode
) >= 16)
4506 addr
= XEXP (ops
[0], 0);
4507 gcc_assert (GET_CODE (addr
) != AND
);
4509 if (!address_needs_split (ops
[0]))
4511 reg
= gen_reg_rtx (TImode
);
4512 emit_insn (gen_spu_convert (reg
, ops
[1]));
4513 ops
[0] = change_address (ops
[0], TImode
, addr
);
4514 emit_move_insn (ops
[0], reg
);
4518 if (GET_CODE (addr
) == PLUS
)
4521 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4522 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4523 aligned reg + aligned const => lqd, c?d, shuf, stqx
4524 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4525 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4526 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4527 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4528 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4531 p0
= XEXP (addr
, 0);
4532 p1
= p1_lo
= XEXP (addr
, 1);
4533 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4535 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4536 if (reg_aligned_for_addr (p0
))
4538 p1
= GEN_INT (INTVAL (p1
) & -16);
4539 if (p1
== const0_rtx
)
4542 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4546 rtx x
= gen_reg_rtx (SImode
);
4547 emit_move_insn (x
, p1
);
4548 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4552 else if (REG_P (addr
))
4556 p1
= p1_lo
= const0_rtx
;
4561 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4562 p1
= 0; /* aform doesn't use p1 */
4564 if (ALIGNED_SYMBOL_REF_P (addr
))
4566 else if (GET_CODE (addr
) == CONST
4567 && GET_CODE (XEXP (addr
, 0)) == PLUS
4568 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4569 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4571 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4573 addr
= gen_rtx_CONST (Pmode
,
4574 gen_rtx_PLUS (Pmode
,
4575 XEXP (XEXP (addr
, 0), 0),
4576 GEN_INT (v
& -16)));
4578 addr
= XEXP (XEXP (addr
, 0), 0);
4579 p1_lo
= GEN_INT (v
& 15);
4581 else if (GET_CODE (addr
) == CONST_INT
)
4583 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4584 addr
= GEN_INT (INTVAL (addr
) & -16);
4588 p1_lo
= gen_reg_rtx (SImode
);
4589 emit_move_insn (p1_lo
, addr
);
4593 reg
= gen_reg_rtx (TImode
);
4595 scalar
= store_with_one_insn_p (ops
[0]);
4598 /* We could copy the flags from the ops[0] MEM to mem here,
4599 We don't because we want this load to be optimized away if
4600 possible, and copying the flags will prevent that in certain
4601 cases, e.g. consider the volatile flag. */
4603 rtx pat
= gen_reg_rtx (TImode
);
4604 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4605 set_mem_alias_set (lmem
, 0);
4606 emit_insn (gen_movti (reg
, lmem
));
4608 if (!p0
|| reg_aligned_for_addr (p0
))
4609 p0
= stack_pointer_rtx
;
4613 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4614 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4618 if (GET_CODE (ops
[1]) == REG
)
4619 emit_insn (gen_spu_convert (reg
, ops
[1]));
4620 else if (GET_CODE (ops
[1]) == SUBREG
)
4621 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4626 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4627 emit_insn (gen_ashlti3
4628 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
4630 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4631 /* We can't use the previous alias set because the memory has changed
4632 size and can potentially overlap objects of other types. */
4633 set_mem_alias_set (smem
, 0);
4635 emit_insn (gen_movti (smem
, reg
));
4639 /* Return TRUE if X is MEM which is a struct member reference
4640 and the member can safely be loaded and stored with a single
4641 instruction because it is padded. */
4643 mem_is_padded_component_ref (rtx x
)
4645 tree t
= MEM_EXPR (x
);
4647 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4649 t
= TREE_OPERAND (t
, 1);
4650 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4651 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4653 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4654 r
= DECL_FIELD_CONTEXT (t
);
4655 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4657 /* Make sure they are the same mode */
4658 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4660 /* If there are no following fields then the field alignment assures
4661 the structure is padded to the alignment which means this field is
4663 if (TREE_CHAIN (t
) == 0)
4665 /* If the following field is also aligned then this field will be
4668 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4673 /* Parse the -mfixed-range= option string. */
4675 fix_range (const char *const_str
)
4678 char *str
, *dash
, *comma
;
4680 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4681 REG2 are either register names or register numbers. The effect
4682 of this option is to mark the registers in the range from REG1 to
4683 REG2 as ``fixed'' so they won't be used by the compiler. */
4685 i
= strlen (const_str
);
4686 str
= (char *) alloca (i
+ 1);
4687 memcpy (str
, const_str
, i
+ 1);
4691 dash
= strchr (str
, '-');
4694 warning (0, "value of -mfixed-range must have form REG1-REG2");
4698 comma
= strchr (dash
+ 1, ',');
4702 first
= decode_reg_name (str
);
4705 warning (0, "unknown register name: %s", str
);
4709 last
= decode_reg_name (dash
+ 1);
4712 warning (0, "unknown register name: %s", dash
+ 1);
4720 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4724 for (i
= first
; i
<= last
; ++i
)
4725 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4735 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4736 can be generated using the fsmbi instruction. */
4738 fsmbi_const_p (rtx x
)
4742 /* We can always choose TImode for CONST_INT because the high bits
4743 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4744 enum immediate_class c
= classify_immediate (x
, TImode
);
4745 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4750 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4751 can be generated using the cbd, chd, cwd or cdd instruction. */
4753 cpat_const_p (rtx x
, enum machine_mode mode
)
4757 enum immediate_class c
= classify_immediate (x
, mode
);
4758 return c
== IC_CPAT
;
4764 gen_cpat_const (rtx
* ops
)
4766 unsigned char dst
[16];
4767 int i
, offset
, shift
, isize
;
4768 if (GET_CODE (ops
[3]) != CONST_INT
4769 || GET_CODE (ops
[2]) != CONST_INT
4770 || (GET_CODE (ops
[1]) != CONST_INT
4771 && GET_CODE (ops
[1]) != REG
))
4773 if (GET_CODE (ops
[1]) == REG
4774 && (!REG_POINTER (ops
[1])
4775 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
4778 for (i
= 0; i
< 16; i
++)
4780 isize
= INTVAL (ops
[3]);
4783 else if (isize
== 2)
4787 offset
= (INTVAL (ops
[2]) +
4788 (GET_CODE (ops
[1]) ==
4789 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
4790 for (i
= 0; i
< isize
; i
++)
4791 dst
[offset
+ i
] = i
+ shift
;
4792 return array_to_constant (TImode
, dst
);
4795 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4796 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4797 than 16 bytes, the value is repeated across the rest of the array. */
4799 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
4804 memset (arr
, 0, 16);
4805 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
4806 if (GET_CODE (x
) == CONST_INT
4807 || (GET_CODE (x
) == CONST_DOUBLE
4808 && (mode
== SFmode
|| mode
== DFmode
)))
4810 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
4812 if (GET_CODE (x
) == CONST_DOUBLE
)
4813 val
= const_double_to_hwint (x
);
4816 first
= GET_MODE_SIZE (mode
) - 1;
4817 for (i
= first
; i
>= 0; i
--)
4819 arr
[i
] = val
& 0xff;
4822 /* Splat the constant across the whole array. */
4823 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
4826 j
= (j
== first
) ? 0 : j
+ 1;
4829 else if (GET_CODE (x
) == CONST_DOUBLE
)
4831 val
= CONST_DOUBLE_LOW (x
);
4832 for (i
= 15; i
>= 8; i
--)
4834 arr
[i
] = val
& 0xff;
4837 val
= CONST_DOUBLE_HIGH (x
);
4838 for (i
= 7; i
>= 0; i
--)
4840 arr
[i
] = val
& 0xff;
4844 else if (GET_CODE (x
) == CONST_VECTOR
)
4848 mode
= GET_MODE_INNER (mode
);
4849 units
= CONST_VECTOR_NUNITS (x
);
4850 for (i
= 0; i
< units
; i
++)
4852 elt
= CONST_VECTOR_ELT (x
, i
);
4853 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
4855 if (GET_CODE (elt
) == CONST_DOUBLE
)
4856 val
= const_double_to_hwint (elt
);
4859 first
= GET_MODE_SIZE (mode
) - 1;
4860 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
4862 for (j
= first
; j
>= 0; j
--)
4864 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
4874 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4875 smaller than 16 bytes, use the bytes that would represent that value
4876 in a register, e.g., for QImode return the value of arr[3]. */
4878 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
4880 enum machine_mode inner_mode
;
4882 int units
, size
, i
, j
, k
;
4885 if (GET_MODE_CLASS (mode
) == MODE_INT
4886 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
4888 j
= GET_MODE_SIZE (mode
);
4889 i
= j
< 4 ? 4 - j
: 0;
4890 for (val
= 0; i
< j
; i
++)
4891 val
= (val
<< 8) | arr
[i
];
4892 val
= trunc_int_for_mode (val
, mode
);
4893 return GEN_INT (val
);
4899 for (i
= high
= 0; i
< 8; i
++)
4900 high
= (high
<< 8) | arr
[i
];
4901 for (i
= 8, val
= 0; i
< 16; i
++)
4902 val
= (val
<< 8) | arr
[i
];
4903 return immed_double_const (val
, high
, TImode
);
4907 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4908 val
= trunc_int_for_mode (val
, SImode
);
4909 return hwint_to_const_double (SFmode
, val
);
4913 for (i
= 0, val
= 0; i
< 8; i
++)
4914 val
= (val
<< 8) | arr
[i
];
4915 return hwint_to_const_double (DFmode
, val
);
4918 if (!VECTOR_MODE_P (mode
))
4921 units
= GET_MODE_NUNITS (mode
);
4922 size
= GET_MODE_UNIT_SIZE (mode
);
4923 inner_mode
= GET_MODE_INNER (mode
);
4924 v
= rtvec_alloc (units
);
4926 for (k
= i
= 0; i
< units
; ++i
)
4929 for (j
= 0; j
< size
; j
++, k
++)
4930 val
= (val
<< 8) | arr
[k
];
4932 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
4933 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
4935 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
4940 return gen_rtx_CONST_VECTOR (mode
, v
);
4944 reloc_diagnostic (rtx x
)
4946 tree loc_decl
, decl
= 0;
4948 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
4951 if (GET_CODE (x
) == SYMBOL_REF
)
4952 decl
= SYMBOL_REF_DECL (x
);
4953 else if (GET_CODE (x
) == CONST
4954 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4955 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
4957 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4958 if (decl
&& !DECL_P (decl
))
4961 /* We use last_assemble_variable_decl to get line information. It's
4962 not always going to be right and might not even be close, but will
4963 be right for the more common cases. */
4964 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
4967 loc_decl
= last_assemble_variable_decl
;
4969 /* The decl could be a string constant. */
4970 if (decl
&& DECL_P (decl
))
4971 msg
= "%Jcreating run-time relocation for %qD";
4973 msg
= "creating run-time relocation";
4975 if (TARGET_WARN_RELOC
)
4976 warning (0, msg
, loc_decl
, decl
);
4978 error (msg
, loc_decl
, decl
);
4981 /* Hook into assemble_integer so we can generate an error for run-time
4982 relocations. The SPU ABI disallows them. */
4984 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4986 /* By default run-time relocations aren't supported, but we allow them
4987 in case users support it in their own run-time loader. And we provide
4988 a warning for those users that don't. */
4989 if ((GET_CODE (x
) == SYMBOL_REF
)
4990 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
4991 reloc_diagnostic (x
);
4993 return default_assemble_integer (x
, size
, aligned_p
);
4997 spu_asm_globalize_label (FILE * file
, const char *name
)
4999 fputs ("\t.global\t", file
);
5000 assemble_name (file
, name
);
5005 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
,
5006 bool speed ATTRIBUTE_UNUSED
)
5008 enum machine_mode mode
= GET_MODE (x
);
5009 int cost
= COSTS_N_INSNS (2);
5011 /* Folding to a CONST_VECTOR will use extra space but there might
5012 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5013 only if it allows us to fold away multiple insns. Changing the cost
5014 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5015 because this cost will only be compared against a single insn.
5016 if (code == CONST_VECTOR)
5017 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5020 /* Use defaults for float operations. Not accurate but good enough. */
5023 *total
= COSTS_N_INSNS (13);
5028 *total
= COSTS_N_INSNS (6);
5034 if (satisfies_constraint_K (x
))
5036 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5037 *total
= COSTS_N_INSNS (1);
5039 *total
= COSTS_N_INSNS (3);
5043 *total
= COSTS_N_INSNS (3);
5048 *total
= COSTS_N_INSNS (0);
5052 *total
= COSTS_N_INSNS (5);
5056 case FLOAT_TRUNCATE
:
5058 case UNSIGNED_FLOAT
:
5061 *total
= COSTS_N_INSNS (7);
5067 *total
= COSTS_N_INSNS (9);
5074 GET_CODE (XEXP (x
, 0)) ==
5075 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5076 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5078 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5080 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5081 cost
= COSTS_N_INSNS (14);
5082 if ((val
& 0xffff) == 0)
5083 cost
= COSTS_N_INSNS (9);
5084 else if (val
> 0 && val
< 0x10000)
5085 cost
= COSTS_N_INSNS (11);
5094 *total
= COSTS_N_INSNS (20);
5101 *total
= COSTS_N_INSNS (4);
5104 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5105 *total
= COSTS_N_INSNS (0);
5107 *total
= COSTS_N_INSNS (4);
5110 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5111 if (GET_MODE_CLASS (mode
) == MODE_INT
5112 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5113 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5114 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5119 static enum machine_mode
5120 spu_unwind_word_mode (void)
5125 /* Decide whether we can make a sibling call to a function. DECL is the
5126 declaration of the function being targeted by the call and EXP is the
5127 CALL_EXPR representing the call. */
5129 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5131 return decl
&& !TARGET_LARGE_MEM
;
5134 /* We need to correctly update the back chain pointer and the Available
5135 Stack Size (which is in the second slot of the sp register.) */
5137 spu_allocate_stack (rtx op0
, rtx op1
)
5140 rtx chain
= gen_reg_rtx (V4SImode
);
5141 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5142 rtx sp
= gen_reg_rtx (V4SImode
);
5143 rtx splatted
= gen_reg_rtx (V4SImode
);
5144 rtx pat
= gen_reg_rtx (TImode
);
5146 /* copy the back chain so we can save it back again. */
5147 emit_move_insn (chain
, stack_bot
);
5149 op1
= force_reg (SImode
, op1
);
5151 v
= 0x1020300010203ll
;
5152 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5153 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5155 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5156 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5158 if (flag_stack_check
)
5160 rtx avail
= gen_reg_rtx(SImode
);
5161 rtx result
= gen_reg_rtx(SImode
);
5162 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5163 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5164 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5167 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5169 emit_move_insn (stack_bot
, chain
);
5171 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5175 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5177 static unsigned char arr
[16] =
5178 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5179 rtx temp
= gen_reg_rtx (SImode
);
5180 rtx temp2
= gen_reg_rtx (SImode
);
5181 rtx temp3
= gen_reg_rtx (V4SImode
);
5182 rtx temp4
= gen_reg_rtx (V4SImode
);
5183 rtx pat
= gen_reg_rtx (TImode
);
5184 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5186 /* Restore the backchain from the first word, sp from the second. */
5187 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5188 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5190 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5192 /* Compute Available Stack Size for sp */
5193 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5194 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5196 /* Compute Available Stack Size for back chain */
5197 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5198 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5199 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5201 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5202 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5206 spu_init_libfuncs (void)
5208 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5209 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5210 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5211 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5212 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5213 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5214 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5215 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5216 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5217 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5218 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5220 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5221 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5223 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5224 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5225 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5226 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5227 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5228 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5231 /* Make a subreg, stripping any existing subreg. We could possibly just
5232 call simplify_subreg, but in this case we know what we want. */
5234 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5236 if (GET_CODE (x
) == SUBREG
)
5238 if (GET_MODE (x
) == mode
)
5240 return gen_rtx_SUBREG (mode
, x
, 0);
5244 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5246 return (TYPE_MODE (type
) == BLKmode
5248 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5249 || int_size_in_bytes (type
) >
5250 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5253 /* Create the built-in types and functions */
5255 enum spu_function_code
5257 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5258 #include "spu-builtins.def"
5263 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5265 struct spu_builtin_description spu_builtins
[] = {
5266 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5267 {fcode, icode, name, type, params, NULL_TREE},
5268 #include "spu-builtins.def"
5273 spu_init_builtins (void)
5275 struct spu_builtin_description
*d
;
5278 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5279 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5280 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5281 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5282 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5283 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5285 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5286 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5287 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5288 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5290 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5292 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5293 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5294 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5295 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5296 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5297 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5298 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5299 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5300 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5301 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5302 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5303 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5305 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5306 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5307 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5308 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5309 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5310 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5311 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5312 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5314 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5315 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5317 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5319 spu_builtin_types
[SPU_BTI_PTR
] =
5320 build_pointer_type (build_qualified_type
5322 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5324 /* For each builtin we build a new prototype. The tree code will make
5325 sure nodes are shared. */
5326 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5329 char name
[64]; /* build_function will make a copy. */
5335 /* Find last parm. */
5336 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5341 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5343 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5345 sprintf (name
, "__builtin_%s", d
->name
);
5347 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
5349 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5350 TREE_READONLY (d
->fndecl
) = 1;
5352 /* These builtins don't throw. */
5353 TREE_NOTHROW (d
->fndecl
) = 1;
5358 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5360 static unsigned char arr
[16] =
5361 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5363 rtx temp
= gen_reg_rtx (Pmode
);
5364 rtx temp2
= gen_reg_rtx (V4SImode
);
5365 rtx temp3
= gen_reg_rtx (V4SImode
);
5366 rtx pat
= gen_reg_rtx (TImode
);
5367 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5369 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5371 /* Restore the sp. */
5372 emit_move_insn (temp
, op1
);
5373 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5375 /* Compute available stack size for sp. */
5376 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5377 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5379 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5380 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5384 spu_safe_dma (HOST_WIDE_INT channel
)
5386 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5390 spu_builtin_splats (rtx ops
[])
5392 enum machine_mode mode
= GET_MODE (ops
[0]);
5393 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5395 unsigned char arr
[16];
5396 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5397 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5401 rtx reg
= gen_reg_rtx (TImode
);
5403 if (GET_CODE (ops
[1]) != REG
5404 && GET_CODE (ops
[1]) != SUBREG
)
5405 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5411 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5417 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5422 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5427 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5433 emit_move_insn (reg
, shuf
);
5434 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5439 spu_builtin_extract (rtx ops
[])
5441 enum machine_mode mode
;
5444 mode
= GET_MODE (ops
[1]);
5446 if (GET_CODE (ops
[2]) == CONST_INT
)
5451 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5454 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5457 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5460 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5463 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5466 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5474 from
= spu_gen_subreg (TImode
, ops
[1]);
5475 rot
= gen_reg_rtx (TImode
);
5476 tmp
= gen_reg_rtx (SImode
);
5481 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5484 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5485 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5489 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5493 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5498 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5500 emit_insn (gen_spu_convert (ops
[0], rot
));
5504 spu_builtin_insert (rtx ops
[])
5506 enum machine_mode mode
= GET_MODE (ops
[0]);
5507 enum machine_mode imode
= GET_MODE_INNER (mode
);
5508 rtx mask
= gen_reg_rtx (TImode
);
5511 if (GET_CODE (ops
[3]) == CONST_INT
)
5512 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5515 offset
= gen_reg_rtx (SImode
);
5516 emit_insn (gen_mulsi3
5517 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5520 (mask
, stack_pointer_rtx
, offset
,
5521 GEN_INT (GET_MODE_SIZE (imode
))));
5522 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5526 spu_builtin_promote (rtx ops
[])
5528 enum machine_mode mode
, imode
;
5529 rtx rot
, from
, offset
;
5532 mode
= GET_MODE (ops
[0]);
5533 imode
= GET_MODE_INNER (mode
);
5535 from
= gen_reg_rtx (TImode
);
5536 rot
= spu_gen_subreg (TImode
, ops
[0]);
5538 emit_insn (gen_spu_convert (from
, ops
[1]));
5540 if (GET_CODE (ops
[2]) == CONST_INT
)
5542 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5543 if (GET_MODE_SIZE (imode
) < 4)
5544 pos
+= 4 - GET_MODE_SIZE (imode
);
5545 offset
= GEN_INT (pos
& 15);
5549 offset
= gen_reg_rtx (SImode
);
5553 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5556 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5557 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5561 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5562 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5566 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5572 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5576 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
5578 rtx shuf
= gen_reg_rtx (V4SImode
);
5579 rtx insn
= gen_reg_rtx (V4SImode
);
5584 fnaddr
= force_reg (SImode
, fnaddr
);
5585 cxt
= force_reg (SImode
, cxt
);
5587 if (TARGET_LARGE_MEM
)
5589 rtx rotl
= gen_reg_rtx (V4SImode
);
5590 rtx mask
= gen_reg_rtx (V4SImode
);
5591 rtx bi
= gen_reg_rtx (SImode
);
5592 unsigned char shufa
[16] = {
5593 2, 3, 0, 1, 18, 19, 16, 17,
5594 0, 1, 2, 3, 16, 17, 18, 19
5596 unsigned char insna
[16] = {
5598 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5600 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5603 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5604 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5606 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5607 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5608 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5609 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5611 mem
= memory_address (Pmode
, tramp
);
5612 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
5614 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5615 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
5616 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
5620 rtx scxt
= gen_reg_rtx (SImode
);
5621 rtx sfnaddr
= gen_reg_rtx (SImode
);
5622 unsigned char insna
[16] = {
5623 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5629 shufc
= gen_reg_rtx (TImode
);
5630 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5632 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5633 fits 18 bits and the last 4 are zeros. This will be true if
5634 the stack pointer is initialized to 0x3fff0 at program start,
5635 otherwise the ila instruction will be garbage. */
5637 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5638 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5640 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5641 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5642 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5644 mem
= memory_address (Pmode
, tramp
);
5645 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
5648 emit_insn (gen_sync ());
5652 spu_expand_sign_extend (rtx ops
[])
5654 unsigned char arr
[16];
5655 rtx pat
= gen_reg_rtx (TImode
);
5658 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5659 if (GET_MODE (ops
[1]) == QImode
)
5661 sign
= gen_reg_rtx (HImode
);
5662 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5663 for (i
= 0; i
< 16; i
++)
5669 for (i
= 0; i
< 16; i
++)
5671 switch (GET_MODE (ops
[1]))
5674 sign
= gen_reg_rtx (SImode
);
5675 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5677 arr
[last
- 1] = 0x02;
5680 sign
= gen_reg_rtx (SImode
);
5681 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5682 for (i
= 0; i
< 4; i
++)
5683 arr
[last
- i
] = 3 - i
;
5686 sign
= gen_reg_rtx (SImode
);
5687 c
= gen_reg_rtx (SImode
);
5688 emit_insn (gen_spu_convert (c
, ops
[1]));
5689 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5690 for (i
= 0; i
< 8; i
++)
5691 arr
[last
- i
] = 7 - i
;
5697 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5698 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5701 /* expand vector initialization. If there are any constant parts,
5702 load constant parts first. Then load any non-constant parts. */
5704 spu_expand_vector_init (rtx target
, rtx vals
)
5706 enum machine_mode mode
= GET_MODE (target
);
5707 int n_elts
= GET_MODE_NUNITS (mode
);
5709 bool all_same
= true;
5710 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5713 first
= XVECEXP (vals
, 0, 0);
5714 for (i
= 0; i
< n_elts
; ++i
)
5716 x
= XVECEXP (vals
, 0, i
);
5717 if (!(CONST_INT_P (x
)
5718 || GET_CODE (x
) == CONST_DOUBLE
5719 || GET_CODE (x
) == CONST_FIXED
))
5723 if (first_constant
== NULL_RTX
)
5726 if (i
> 0 && !rtx_equal_p (x
, first
))
5730 /* if all elements are the same, use splats to repeat elements */
5733 if (!CONSTANT_P (first
)
5734 && !register_operand (first
, GET_MODE (x
)))
5735 first
= force_reg (GET_MODE (first
), first
);
5736 emit_insn (gen_spu_splats (target
, first
));
5740 /* load constant parts */
5741 if (n_var
!= n_elts
)
5745 emit_move_insn (target
,
5746 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
5750 rtx constant_parts_rtx
= copy_rtx (vals
);
5752 gcc_assert (first_constant
!= NULL_RTX
);
5753 /* fill empty slots with the first constant, this increases
5754 our chance of using splats in the recursive call below. */
5755 for (i
= 0; i
< n_elts
; ++i
)
5757 x
= XVECEXP (constant_parts_rtx
, 0, i
);
5758 if (!(CONST_INT_P (x
)
5759 || GET_CODE (x
) == CONST_DOUBLE
5760 || GET_CODE (x
) == CONST_FIXED
))
5761 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
5764 spu_expand_vector_init (target
, constant_parts_rtx
);
5768 /* load variable parts */
5771 rtx insert_operands
[4];
5773 insert_operands
[0] = target
;
5774 insert_operands
[2] = target
;
5775 for (i
= 0; i
< n_elts
; ++i
)
5777 x
= XVECEXP (vals
, 0, i
);
5778 if (!(CONST_INT_P (x
)
5779 || GET_CODE (x
) == CONST_DOUBLE
5780 || GET_CODE (x
) == CONST_FIXED
))
5782 if (!register_operand (x
, GET_MODE (x
)))
5783 x
= force_reg (GET_MODE (x
), x
);
5784 insert_operands
[1] = x
;
5785 insert_operands
[3] = GEN_INT (i
);
5786 spu_builtin_insert (insert_operands
);
5792 /* Return insn index for the vector compare instruction for given CODE,
5793 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5796 get_vec_cmp_insn (enum rtx_code code
,
5797 enum machine_mode dest_mode
,
5798 enum machine_mode op_mode
)
5804 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5805 return CODE_FOR_ceq_v16qi
;
5806 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5807 return CODE_FOR_ceq_v8hi
;
5808 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5809 return CODE_FOR_ceq_v4si
;
5810 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5811 return CODE_FOR_ceq_v4sf
;
5812 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5813 return CODE_FOR_ceq_v2df
;
5816 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5817 return CODE_FOR_cgt_v16qi
;
5818 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5819 return CODE_FOR_cgt_v8hi
;
5820 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5821 return CODE_FOR_cgt_v4si
;
5822 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5823 return CODE_FOR_cgt_v4sf
;
5824 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5825 return CODE_FOR_cgt_v2df
;
5828 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5829 return CODE_FOR_clgt_v16qi
;
5830 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5831 return CODE_FOR_clgt_v8hi
;
5832 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5833 return CODE_FOR_clgt_v4si
;
5841 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5842 DMODE is expected destination mode. This is a recursive function. */
5845 spu_emit_vector_compare (enum rtx_code rcode
,
5847 enum machine_mode dmode
)
5851 enum machine_mode dest_mode
;
5852 enum machine_mode op_mode
= GET_MODE (op1
);
5854 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
5856 /* Floating point vector compare instructions uses destination V4SImode.
5857 Double floating point vector compare instructions uses destination V2DImode.
5858 Move destination to appropriate mode later. */
5859 if (dmode
== V4SFmode
)
5860 dest_mode
= V4SImode
;
5861 else if (dmode
== V2DFmode
)
5862 dest_mode
= V2DImode
;
5866 mask
= gen_reg_rtx (dest_mode
);
5867 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5869 if (vec_cmp_insn
== -1)
5871 bool swap_operands
= false;
5872 bool try_again
= false;
5877 swap_operands
= true;
5882 swap_operands
= true;
5886 /* Treat A != B as ~(A==B). */
5888 enum insn_code nor_code
;
5889 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5890 nor_code
= optab_handler (one_cmpl_optab
, (int)dest_mode
)->insn_code
;
5891 gcc_assert (nor_code
!= CODE_FOR_nothing
);
5892 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
5893 if (dmode
!= dest_mode
)
5895 rtx temp
= gen_reg_rtx (dest_mode
);
5896 convert_move (temp
, mask
, 0);
5906 /* Try GT/GTU/LT/LTU OR EQ */
5909 enum insn_code ior_code
;
5910 enum rtx_code new_code
;
5914 case GE
: new_code
= GT
; break;
5915 case GEU
: new_code
= GTU
; break;
5916 case LE
: new_code
= LT
; break;
5917 case LEU
: new_code
= LTU
; break;
5922 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
5923 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5925 ior_code
= optab_handler (ior_optab
, (int)dest_mode
)->insn_code
;
5926 gcc_assert (ior_code
!= CODE_FOR_nothing
);
5927 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
5928 if (dmode
!= dest_mode
)
5930 rtx temp
= gen_reg_rtx (dest_mode
);
5931 convert_move (temp
, mask
, 0);
5941 /* You only get two chances. */
5943 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5945 gcc_assert (vec_cmp_insn
!= -1);
5956 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
5957 if (dmode
!= dest_mode
)
5959 rtx temp
= gen_reg_rtx (dest_mode
);
5960 convert_move (temp
, mask
, 0);
5967 /* Emit vector conditional expression.
5968 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5969 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5972 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
5973 rtx cond
, rtx cc_op0
, rtx cc_op1
)
5975 enum machine_mode dest_mode
= GET_MODE (dest
);
5976 enum rtx_code rcode
= GET_CODE (cond
);
5979 /* Get the vector mask for the given relational operations. */
5980 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
5982 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
5988 spu_force_reg (enum machine_mode mode
, rtx op
)
5991 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
5993 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
5994 || GET_MODE (op
) == BLKmode
)
5995 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
5999 r
= force_reg (GET_MODE (op
), op
);
6000 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6002 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6007 x
= gen_reg_rtx (mode
);
6008 emit_insn (gen_spu_convert (x
, r
));
6013 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6015 HOST_WIDE_INT v
= 0;
6017 /* Check the range of immediate operands. */
6018 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6020 int range
= p
- SPU_BTI_7
;
6022 if (!CONSTANT_P (op
))
6023 error ("%s expects an integer literal in the range [%d, %d].",
6025 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6027 if (GET_CODE (op
) == CONST
6028 && (GET_CODE (XEXP (op
, 0)) == PLUS
6029 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6031 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6032 op
= XEXP (XEXP (op
, 0), 0);
6034 else if (GET_CODE (op
) == CONST_INT
)
6036 else if (GET_CODE (op
) == CONST_VECTOR
6037 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6038 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6040 /* The default for v is 0 which is valid in every range. */
6041 if (v
< spu_builtin_range
[range
].low
6042 || v
> spu_builtin_range
[range
].high
)
6043 error ("%s expects an integer literal in the range [%d, %d]. ("
6044 HOST_WIDE_INT_PRINT_DEC
")",
6046 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6055 /* This is only used in lqa, and stqa. Even though the insns
6056 encode 16 bits of the address (all but the 2 least
6057 significant), only 14 bits are used because it is masked to
6058 be 16 byte aligned. */
6062 /* This is used for lqr and stqr. */
6069 if (GET_CODE (op
) == LABEL_REF
6070 || (GET_CODE (op
) == SYMBOL_REF
6071 && SYMBOL_REF_FUNCTION_P (op
))
6072 || (v
& ((1 << lsbits
) - 1)) != 0)
6073 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
6080 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6081 rtx target
, rtx ops
[])
6083 enum insn_code icode
= (enum insn_code
) d
->icode
;
6086 /* Expand the arguments into rtl. */
6088 if (d
->parm
[0] != SPU_BTI_VOID
)
6091 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6093 tree arg
= CALL_EXPR_ARG (exp
, a
);
6096 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6099 /* The insn pattern may have additional operands (SCRATCH).
6100 Return the number of actual non-SCRATCH operands. */
6101 gcc_assert (i
<= insn_data
[icode
].n_operands
);
6106 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6107 tree exp
, rtx target
)
6111 enum insn_code icode
= (enum insn_code
) d
->icode
;
6112 enum machine_mode mode
, tmode
;
6117 /* Set up ops[] with values from arglist. */
6118 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6120 /* Handle the target operand which must be operand 0. */
6122 if (d
->parm
[0] != SPU_BTI_VOID
)
6125 /* We prefer the mode specified for the match_operand otherwise
6126 use the mode from the builtin function prototype. */
6127 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6128 if (tmode
== VOIDmode
)
6129 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6131 /* Try to use target because not using it can lead to extra copies
6132 and when we are using all of the registers extra copies leads
6134 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6137 target
= ops
[0] = gen_reg_rtx (tmode
);
6139 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6145 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6147 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6152 arg
= CALL_EXPR_ARG (exp
, 0);
6153 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
6154 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6155 addr
= memory_address (mode
, op
);
6158 op
= gen_reg_rtx (GET_MODE (addr
));
6159 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6160 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6161 op
= gen_rtx_MEM (mode
, op
);
6163 pat
= GEN_FCN (icode
) (target
, op
);
6170 /* Ignore align_hint, but still expand it's args in case they have
6172 if (icode
== CODE_FOR_spu_align_hint
)
6175 /* Handle the rest of the operands. */
6176 for (p
= 1; i
< n_operands
; i
++, p
++)
6178 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6179 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6181 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6183 /* mode can be VOIDmode here for labels */
6185 /* For specific intrinsics with an immediate operand, e.g.,
6186 si_ai(), we sometimes need to convert the scalar argument to a
6187 vector argument by splatting the scalar. */
6188 if (VECTOR_MODE_P (mode
)
6189 && (GET_CODE (ops
[i
]) == CONST_INT
6190 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6191 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6193 if (GET_CODE (ops
[i
]) == CONST_INT
)
6194 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6197 rtx reg
= gen_reg_rtx (mode
);
6198 enum machine_mode imode
= GET_MODE_INNER (mode
);
6199 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6200 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6201 if (imode
!= GET_MODE (ops
[i
]))
6202 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6203 TYPE_UNSIGNED (spu_builtin_types
6205 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6210 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6212 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6213 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6219 pat
= GEN_FCN (icode
) (0);
6222 pat
= GEN_FCN (icode
) (ops
[0]);
6225 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6228 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6231 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6234 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6237 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6246 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6247 emit_call_insn (pat
);
6248 else if (d
->type
== B_JUMP
)
6250 emit_jump_insn (pat
);
6256 return_type
= spu_builtin_types
[d
->parm
[0]];
6257 if (d
->parm
[0] != SPU_BTI_VOID
6258 && GET_MODE (target
) != TYPE_MODE (return_type
))
6260 /* target is the return value. It should always be the mode of
6261 the builtin function prototype. */
6262 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6269 spu_expand_builtin (tree exp
,
6271 rtx subtarget ATTRIBUTE_UNUSED
,
6272 enum machine_mode mode ATTRIBUTE_UNUSED
,
6273 int ignore ATTRIBUTE_UNUSED
)
6275 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6276 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
6277 struct spu_builtin_description
*d
;
6279 if (fcode
< NUM_SPU_BUILTINS
)
6281 d
= &spu_builtins
[fcode
];
6283 return spu_expand_builtin_1 (d
, exp
, target
);
6288 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6290 spu_builtin_mul_widen_even (tree type
)
6292 switch (TYPE_MODE (type
))
6295 if (TYPE_UNSIGNED (type
))
6296 return spu_builtins
[SPU_MULE_0
].fndecl
;
6298 return spu_builtins
[SPU_MULE_1
].fndecl
;
6305 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6307 spu_builtin_mul_widen_odd (tree type
)
6309 switch (TYPE_MODE (type
))
6312 if (TYPE_UNSIGNED (type
))
6313 return spu_builtins
[SPU_MULO_1
].fndecl
;
6315 return spu_builtins
[SPU_MULO_0
].fndecl
;
6322 /* Implement targetm.vectorize.builtin_mask_for_load. */
6324 spu_builtin_mask_for_load (void)
6326 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
6331 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6333 spu_builtin_vectorization_cost (bool runtime_test
)
6335 /* If the branch of the runtime test is taken - i.e. - the vectorized
6336 version is skipped - this incurs a misprediction cost (because the
6337 vectorized version is expected to be the fall-through). So we subtract
6338 the latency of a mispredicted branch from the costs that are incurred
6339 when the vectorized version is executed. */
6346 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6347 after applying N number of iterations. This routine does not determine
6348 how may iterations are required to reach desired alignment. */
6351 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6356 /* All other types are naturally aligned. */
6360 /* Implement targetm.vectorize.builtin_vec_perm. */
6362 spu_builtin_vec_perm (tree type
, tree
*mask_element_type
)
6364 struct spu_builtin_description
*d
;
6366 *mask_element_type
= unsigned_char_type_node
;
6368 switch (TYPE_MODE (type
))
6371 if (TYPE_UNSIGNED (type
))
6372 d
= &spu_builtins
[SPU_SHUFFLE_0
];
6374 d
= &spu_builtins
[SPU_SHUFFLE_1
];
6378 if (TYPE_UNSIGNED (type
))
6379 d
= &spu_builtins
[SPU_SHUFFLE_2
];
6381 d
= &spu_builtins
[SPU_SHUFFLE_3
];
6385 if (TYPE_UNSIGNED (type
))
6386 d
= &spu_builtins
[SPU_SHUFFLE_4
];
6388 d
= &spu_builtins
[SPU_SHUFFLE_5
];
6392 if (TYPE_UNSIGNED (type
))
6393 d
= &spu_builtins
[SPU_SHUFFLE_6
];
6395 d
= &spu_builtins
[SPU_SHUFFLE_7
];
6399 d
= &spu_builtins
[SPU_SHUFFLE_8
];
6403 d
= &spu_builtins
[SPU_SHUFFLE_9
];
6414 /* Count the total number of instructions in each pipe and return the
6415 maximum, which is used as the Minimum Iteration Interval (MII)
6416 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6417 -2 are instructions that can go in pipe0 or pipe1. */
6419 spu_sms_res_mii (struct ddg
*g
)
6422 unsigned t
[4] = {0, 0, 0, 0};
6424 for (i
= 0; i
< g
->num_nodes
; i
++)
6426 rtx insn
= g
->nodes
[i
].insn
;
6427 int p
= get_pipe (insn
) + 2;
6433 if (dump_file
&& INSN_P (insn
))
6434 fprintf (dump_file
, "i%d %s %d %d\n",
6436 insn_data
[INSN_CODE(insn
)].name
,
6440 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6442 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6447 spu_init_expanders (void)
6452 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6453 frame_pointer_needed is true. We don't know that until we're
6454 expanding the prologue. */
6455 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6457 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6458 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6459 to be treated as aligned, so generate them here. */
6460 r0
= gen_reg_rtx (SImode
);
6461 r1
= gen_reg_rtx (SImode
);
6462 mark_reg_pointer (r0
, 128);
6463 mark_reg_pointer (r1
, 128);
6464 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6465 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6469 static enum machine_mode
6470 spu_libgcc_cmp_return_mode (void)
6473 /* For SPU word mode is TI mode so it is better to use SImode
6474 for compare returns. */
6478 static enum machine_mode
6479 spu_libgcc_shift_count_mode (void)
6481 /* For SPU word mode is TI mode so it is better to use SImode
6482 for shift counts. */
6486 /* An early place to adjust some flags after GCC has finished processing
6489 asm_file_start (void)
6491 /* Variable tracking should be run after all optimizations which
6492 change order of insns. It also needs a valid CFG. */
6493 spu_flag_var_tracking
= flag_var_tracking
;
6494 flag_var_tracking
= 0;
6496 default_file_start ();
6499 /* Implement targetm.section_type_flags. */
6501 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6503 /* .toe needs to have type @nobits. */
6504 if (strcmp (name
, ".toe") == 0)
6506 return default_section_type_flags (decl
, name
, reloc
);
6509 /* Generate a constant or register which contains 2^SCALE. We assume
6510 the result is valid for MODE. Currently, MODE must be V4SFmode and
6511 SCALE must be SImode. */
6513 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
6515 gcc_assert (mode
== V4SFmode
);
6516 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6517 if (GET_CODE (scale
) != CONST_INT
)
6519 /* unsigned int exp = (127 + scale) << 23;
6520 __vector float m = (__vector float) spu_splats (exp); */
6521 rtx reg
= force_reg (SImode
, scale
);
6522 rtx exp
= gen_reg_rtx (SImode
);
6523 rtx mul
= gen_reg_rtx (mode
);
6524 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6525 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6526 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6531 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6532 unsigned char arr
[16];
6533 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6534 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6535 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6536 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6537 return array_to_constant (mode
, arr
);
6541 /* After reload, just change the convert into a move instruction
6542 or a dead instruction. */
6544 spu_split_convert (rtx ops
[])
6546 if (REGNO (ops
[0]) == REGNO (ops
[1]))
6547 emit_note (NOTE_INSN_DELETED
);
6550 /* Use TImode always as this might help hard reg copyprop. */
6551 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
6552 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
6553 emit_insn (gen_move_insn (op0
, op1
));