1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
54 #include "tm-constrs.h"
60 /* Builtin types, data and prototypes. */
62 enum spu_builtin_type_index
64 SPU_BTI_END_OF_PARAMS
,
66 /* We create new type nodes for these. */
78 /* A 16-byte type. (Implemented with V16QI_type_node) */
81 /* These all correspond to intSI_type_node */
95 /* These correspond to the standard types */
115 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
116 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
117 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
118 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
119 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
120 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
121 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
122 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
123 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
124 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
126 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
128 struct spu_builtin_range
133 static struct spu_builtin_range spu_builtin_range
[] = {
134 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
135 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
136 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
137 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
138 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
139 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
140 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
141 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
142 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
143 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
144 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
145 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
149 /* Target specific attribute specifications. */
150 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
152 /* Prototypes and external defs. */
153 static void spu_init_builtins (void);
154 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
155 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
156 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
157 static rtx
get_pic_reg (void);
158 static int need_to_save_reg (int regno
, int saving
);
159 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
160 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
161 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
163 static void emit_nop_for_insn (rtx insn
);
164 static bool insn_clobbers_hbr (rtx insn
);
165 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
166 int distance
, sbitmap blocks
);
167 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
168 enum machine_mode dmode
);
169 static rtx
get_branch_target (rtx branch
);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
174 static int get_pipe (rtx insn
);
175 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx
*, int *, int);
179 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
181 unsigned char *no_add_attrs
);
182 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
184 unsigned char *no_add_attrs
);
185 static int spu_naked_function_p (tree func
);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
187 const_tree type
, unsigned char named
);
188 static tree
spu_build_builtin_va_list (void);
189 static void spu_va_start (tree
, rtx
);
190 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
,
191 gimple_seq
* pre_p
, gimple_seq
* post_p
);
192 static int regno_aligned_for_load (int regno
);
193 static int store_with_one_insn_p (rtx mem
);
194 static int mem_is_padded_component_ref (rtx x
);
195 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
196 static void spu_asm_globalize_label (FILE * file
, const char *name
);
197 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
198 int *total
, bool speed
);
199 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree
, rtx
, int);
204 static rtx
spu_legitimize_address (rtx
, rtx
, enum machine_mode
);
205 static tree
spu_builtin_mul_widen_even (tree
);
206 static tree
spu_builtin_mul_widen_odd (tree
);
207 static tree
spu_builtin_mask_for_load (void);
208 static int spu_builtin_vectorization_cost (bool);
209 static bool spu_vector_alignment_reachable (const_tree
, bool);
210 static tree
spu_builtin_vec_perm (tree
, tree
*);
211 static int spu_sms_res_mii (struct ddg
*g
);
212 static void asm_file_start (void);
213 static unsigned int spu_section_type_flags (tree
, const char *, int);
215 extern const char *reg_names
[];
216 rtx spu_compare_op0
, spu_compare_op1
;
218 /* Which instruction set architecture to use. */
220 /* Which cpu are we tuning for. */
223 /* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229 int spu_hint_dist
= (8*4) - (2*4);
231 /* Determines whether we run variable tracking in machine dependent
233 static int spu_flag_var_tracking
;
248 IC_POOL
, /* constant pool */
249 IC_IL1
, /* one il* instruction */
250 IC_IL2
, /* both ilhu and iohl instructions */
251 IC_IL1s
, /* one il* instruction */
252 IC_IL2s
, /* both ilhu and iohl instructions */
253 IC_FSMBI
, /* the fsmbi instruction */
254 IC_CPAT
, /* one of the c*d instructions */
255 IC_FSMBI2
/* fsmbi plus 1 other instruction */
258 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
259 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
260 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
261 static enum immediate_class
classify_immediate (rtx op
,
262 enum machine_mode mode
);
264 static enum machine_mode
spu_unwind_word_mode (void);
266 static enum machine_mode
267 spu_libgcc_cmp_return_mode (void);
269 static enum machine_mode
270 spu_libgcc_shift_count_mode (void);
273 /* TARGET overrides. */
275 #undef TARGET_INIT_BUILTINS
276 #define TARGET_INIT_BUILTINS spu_init_builtins
278 #undef TARGET_EXPAND_BUILTIN
279 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
281 #undef TARGET_UNWIND_WORD_MODE
282 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
284 #undef TARGET_LEGITIMIZE_ADDRESS
285 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
287 /* The .8byte directive doesn't seem to work well for a 32 bit
289 #undef TARGET_ASM_UNALIGNED_DI_OP
290 #define TARGET_ASM_UNALIGNED_DI_OP NULL
292 #undef TARGET_RTX_COSTS
293 #define TARGET_RTX_COSTS spu_rtx_costs
295 #undef TARGET_ADDRESS_COST
296 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
298 #undef TARGET_SCHED_ISSUE_RATE
299 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
301 #undef TARGET_SCHED_INIT_GLOBAL
302 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
304 #undef TARGET_SCHED_INIT
305 #define TARGET_SCHED_INIT spu_sched_init
307 #undef TARGET_SCHED_VARIABLE_ISSUE
308 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
310 #undef TARGET_SCHED_REORDER
311 #define TARGET_SCHED_REORDER spu_sched_reorder
313 #undef TARGET_SCHED_REORDER2
314 #define TARGET_SCHED_REORDER2 spu_sched_reorder
316 #undef TARGET_SCHED_ADJUST_COST
317 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
319 const struct attribute_spec spu_attribute_table
[];
320 #undef TARGET_ATTRIBUTE_TABLE
321 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER spu_assemble_integer
326 #undef TARGET_SCALAR_MODE_SUPPORTED_P
327 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
330 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
332 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
333 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
335 #undef TARGET_ASM_GLOBALIZE_LABEL
336 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
338 #undef TARGET_PASS_BY_REFERENCE
339 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
341 #undef TARGET_MUST_PASS_IN_STACK
342 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
344 #undef TARGET_BUILD_BUILTIN_VA_LIST
345 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
347 #undef TARGET_EXPAND_BUILTIN_VA_START
348 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
350 #undef TARGET_SETUP_INCOMING_VARARGS
351 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
353 #undef TARGET_MACHINE_DEPENDENT_REORG
354 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
356 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
357 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
359 #undef TARGET_DEFAULT_TARGET_FLAGS
360 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
362 #undef TARGET_INIT_LIBFUNCS
363 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
365 #undef TARGET_RETURN_IN_MEMORY
366 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
368 #undef TARGET_ENCODE_SECTION_INFO
369 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
371 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
372 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
374 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
375 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
377 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
378 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
380 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
381 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
383 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
384 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
386 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
387 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
389 #undef TARGET_LIBGCC_CMP_RETURN_MODE
390 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
392 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
393 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
395 #undef TARGET_SCHED_SMS_RES_MII
396 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
398 #undef TARGET_ASM_FILE_START
399 #define TARGET_ASM_FILE_START asm_file_start
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
404 struct gcc_target targetm
= TARGET_INITIALIZER
;
407 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
409 /* Override some of the default param values. With so many registers
410 larger values are better for these params. */
411 MAX_PENDING_LIST_LENGTH
= 128;
413 /* With so many registers this is better on by default. */
414 flag_rename_registers
= 1;
417 /* Sometimes certain combinations of command options do not make sense
418 on a particular target machine. You can define a macro
419 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
420 executed once just after all the command options have been parsed. */
422 spu_override_options (void)
424 /* Small loops will be unpeeled at -O3. For SPU it is more important
425 to keep code small by default. */
426 if (!flag_unroll_loops
&& !flag_peel_loops
427 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
428 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
430 flag_omit_frame_pointer
= 1;
432 /* Functions must be 8 byte aligned so we correctly handle dual issue */
433 if (align_functions
< 8)
436 spu_hint_dist
= 8*4 - spu_max_nops
*4;
437 if (spu_hint_dist
< 0)
440 if (spu_fixed_range_string
)
441 fix_range (spu_fixed_range_string
);
443 /* Determine processor architectural level. */
446 if (strcmp (&spu_arch_string
[0], "cell") == 0)
447 spu_arch
= PROCESSOR_CELL
;
448 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
449 spu_arch
= PROCESSOR_CELLEDP
;
451 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
454 /* Determine processor to tune for. */
457 if (strcmp (&spu_tune_string
[0], "cell") == 0)
458 spu_tune
= PROCESSOR_CELL
;
459 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
460 spu_tune
= PROCESSOR_CELLEDP
;
462 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
465 /* Change defaults according to the processor architecture. */
466 if (spu_arch
== PROCESSOR_CELLEDP
)
468 /* If no command line option has been otherwise specified, change
469 the default to -mno-safe-hints on celledp -- only the original
470 Cell/B.E. processors require this workaround. */
471 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
472 target_flags
&= ~MASK_SAFE_HINTS
;
475 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
478 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
479 struct attribute_spec.handler. */
481 /* Table of machine attributes. */
482 const struct attribute_spec spu_attribute_table
[] =
484 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
485 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
486 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
487 { NULL
, 0, 0, false, false, false, NULL
}
490 /* True if MODE is valid for the target. By "valid", we mean able to
491 be manipulated in non-trivial ways. In particular, this means all
492 the arithmetic is supported. */
494 spu_scalar_mode_supported_p (enum machine_mode mode
)
512 /* Similarly for vector modes. "Supported" here is less strict. At
513 least some operations are supported; need to check optabs or builtins
514 for further details. */
516 spu_vector_mode_supported_p (enum machine_mode mode
)
533 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
534 least significant bytes of the outer mode. This function returns
535 TRUE for the SUBREG's where this is correct. */
537 valid_subreg (rtx op
)
539 enum machine_mode om
= GET_MODE (op
);
540 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
541 return om
!= VOIDmode
&& im
!= VOIDmode
542 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
543 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
544 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
547 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
548 and adjust the start offset. */
550 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
552 enum machine_mode mode
;
554 /* Strip any paradoxical SUBREG. */
555 if (GET_CODE (op
) == SUBREG
556 && (GET_MODE_BITSIZE (GET_MODE (op
))
557 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
561 GET_MODE_BITSIZE (GET_MODE (op
)) -
562 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
563 op
= SUBREG_REG (op
);
565 /* If it is smaller than SI, assure a SUBREG */
566 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
570 *start
+= 32 - op_size
;
573 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
574 mode
= mode_for_size (op_size
, MODE_INT
, 0);
575 if (mode
!= GET_MODE (op
))
576 op
= gen_rtx_SUBREG (mode
, op
, 0);
581 spu_expand_extv (rtx ops
[], int unsignedp
)
583 HOST_WIDE_INT width
= INTVAL (ops
[2]);
584 HOST_WIDE_INT start
= INTVAL (ops
[3]);
585 HOST_WIDE_INT src_size
, dst_size
;
586 enum machine_mode src_mode
, dst_mode
;
587 rtx dst
= ops
[0], src
= ops
[1];
590 dst
= adjust_operand (ops
[0], 0);
591 dst_mode
= GET_MODE (dst
);
592 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
594 src
= adjust_operand (src
, &start
);
595 src_mode
= GET_MODE (src
);
596 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
600 s
= gen_reg_rtx (src_mode
);
604 emit_insn (gen_ashlsi3 (s
, src
, GEN_INT (start
)));
607 emit_insn (gen_ashldi3 (s
, src
, GEN_INT (start
)));
610 emit_insn (gen_ashlti3 (s
, src
, GEN_INT (start
)));
618 if (width
< src_size
)
625 icode
= unsignedp
? CODE_FOR_lshrsi3
: CODE_FOR_ashrsi3
;
628 icode
= unsignedp
? CODE_FOR_lshrdi3
: CODE_FOR_ashrdi3
;
631 icode
= unsignedp
? CODE_FOR_lshrti3
: CODE_FOR_ashrti3
;
636 s
= gen_reg_rtx (src_mode
);
637 pat
= GEN_FCN (icode
) (s
, src
, GEN_INT (src_size
- width
));
642 convert_move (dst
, src
, unsignedp
);
646 spu_expand_insv (rtx ops
[])
648 HOST_WIDE_INT width
= INTVAL (ops
[1]);
649 HOST_WIDE_INT start
= INTVAL (ops
[2]);
650 HOST_WIDE_INT maskbits
;
651 enum machine_mode dst_mode
, src_mode
;
652 rtx dst
= ops
[0], src
= ops
[3];
653 int dst_size
, src_size
;
659 if (GET_CODE (ops
[0]) == MEM
)
660 dst
= gen_reg_rtx (TImode
);
662 dst
= adjust_operand (dst
, &start
);
663 dst_mode
= GET_MODE (dst
);
664 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
666 if (CONSTANT_P (src
))
668 enum machine_mode m
=
669 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
670 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
672 src
= adjust_operand (src
, 0);
673 src_mode
= GET_MODE (src
);
674 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
676 mask
= gen_reg_rtx (dst_mode
);
677 shift_reg
= gen_reg_rtx (dst_mode
);
678 shift
= dst_size
- start
- width
;
680 /* It's not safe to use subreg here because the compiler assumes
681 that the SUBREG_REG is right justified in the SUBREG. */
682 convert_move (shift_reg
, src
, 1);
689 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
692 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
695 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
707 maskbits
= (-1ll << (32 - width
- start
));
709 maskbits
+= (1ll << (32 - start
));
710 emit_move_insn (mask
, GEN_INT (maskbits
));
713 maskbits
= (-1ll << (64 - width
- start
));
715 maskbits
+= (1ll << (64 - start
));
716 emit_move_insn (mask
, GEN_INT (maskbits
));
720 unsigned char arr
[16];
722 memset (arr
, 0, sizeof (arr
));
723 arr
[i
] = 0xff >> (start
& 7);
724 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
726 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
727 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
733 if (GET_CODE (ops
[0]) == MEM
)
735 rtx aligned
= gen_reg_rtx (SImode
);
736 rtx low
= gen_reg_rtx (SImode
);
737 rtx addr
= gen_reg_rtx (SImode
);
738 rtx rotl
= gen_reg_rtx (SImode
);
739 rtx mask0
= gen_reg_rtx (TImode
);
742 emit_move_insn (addr
, XEXP (ops
[0], 0));
743 emit_insn (gen_andsi3 (aligned
, addr
, GEN_INT (-16)));
744 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
745 emit_insn (gen_negsi2 (rotl
, low
));
746 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
747 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
748 mem
= change_address (ops
[0], TImode
, aligned
);
749 set_mem_alias_set (mem
, 0);
750 emit_move_insn (dst
, mem
);
751 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
752 emit_move_insn (mem
, dst
);
753 if (start
+ width
> MEM_ALIGN (ops
[0]))
755 rtx shl
= gen_reg_rtx (SImode
);
756 rtx mask1
= gen_reg_rtx (TImode
);
757 rtx dst1
= gen_reg_rtx (TImode
);
759 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
760 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
761 mem1
= adjust_address (mem
, TImode
, 16);
762 set_mem_alias_set (mem1
, 0);
763 emit_move_insn (dst1
, mem1
);
764 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
765 emit_move_insn (mem1
, dst1
);
769 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
774 spu_expand_block_move (rtx ops
[])
776 HOST_WIDE_INT bytes
, align
, offset
;
777 rtx src
, dst
, sreg
, dreg
, target
;
779 if (GET_CODE (ops
[2]) != CONST_INT
780 || GET_CODE (ops
[3]) != CONST_INT
781 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
784 bytes
= INTVAL (ops
[2]);
785 align
= INTVAL (ops
[3]);
795 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
797 dst
= adjust_address (ops
[0], V16QImode
, offset
);
798 src
= adjust_address (ops
[1], V16QImode
, offset
);
799 emit_move_insn (dst
, src
);
804 unsigned char arr
[16] = { 0 };
805 for (i
= 0; i
< bytes
- offset
; i
++)
807 dst
= adjust_address (ops
[0], V16QImode
, offset
);
808 src
= adjust_address (ops
[1], V16QImode
, offset
);
809 mask
= gen_reg_rtx (V16QImode
);
810 sreg
= gen_reg_rtx (V16QImode
);
811 dreg
= gen_reg_rtx (V16QImode
);
812 target
= gen_reg_rtx (V16QImode
);
813 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
814 emit_move_insn (dreg
, dst
);
815 emit_move_insn (sreg
, src
);
816 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
817 emit_move_insn (dst
, target
);
825 { SPU_EQ
, SPU_GT
, SPU_GTU
};
827 int spu_comp_icode
[12][3] = {
828 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
829 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
830 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
831 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
832 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
833 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
834 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
835 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
836 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
837 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
838 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
839 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
842 /* Generate a compare for CODE. Return a brand-new rtx that represents
843 the result of the compare. GCC can figure this out too if we don't
844 provide all variations of compares, but GCC always wants to use
845 WORD_MODE, we can generate better code in most cases if we do it
848 spu_emit_branch_or_set (int is_set
, enum rtx_code code
, rtx operands
[])
850 int reverse_compare
= 0;
851 int reverse_test
= 0;
852 rtx compare_result
, eq_result
;
853 rtx comp_rtx
, eq_rtx
;
854 rtx target
= operands
[0];
855 enum machine_mode comp_mode
;
856 enum machine_mode op_mode
;
857 enum spu_comp_code scode
, eq_code
;
858 enum insn_code ior_code
;
862 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
863 and so on, to keep the constant in operand 1. */
864 if (GET_CODE (spu_compare_op1
) == CONST_INT
)
866 HOST_WIDE_INT val
= INTVAL (spu_compare_op1
) - 1;
867 if (trunc_int_for_mode (val
, GET_MODE (spu_compare_op0
)) == val
)
871 spu_compare_op1
= GEN_INT (val
);
875 spu_compare_op1
= GEN_INT (val
);
879 spu_compare_op1
= GEN_INT (val
);
883 spu_compare_op1
= GEN_INT (val
);
892 op_mode
= GET_MODE (spu_compare_op0
);
898 if (HONOR_NANS (op_mode
))
913 if (HONOR_NANS (op_mode
))
1001 comp_mode
= op_mode
;
1005 comp_mode
= V4SImode
;
1009 comp_mode
= V2DImode
;
1016 if (GET_MODE (spu_compare_op1
) == DFmode
1017 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
1020 if (is_set
== 0 && spu_compare_op1
== const0_rtx
1021 && (GET_MODE (spu_compare_op0
) == SImode
1022 || GET_MODE (spu_compare_op0
) == HImode
) && scode
== SPU_EQ
)
1024 /* Don't need to set a register with the result when we are
1025 comparing against zero and branching. */
1026 reverse_test
= !reverse_test
;
1027 compare_result
= spu_compare_op0
;
1031 compare_result
= gen_reg_rtx (comp_mode
);
1033 if (reverse_compare
)
1035 rtx t
= spu_compare_op1
;
1036 spu_compare_op1
= spu_compare_op0
;
1037 spu_compare_op0
= t
;
1040 if (spu_comp_icode
[index
][scode
] == 0)
1043 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
1044 (spu_compare_op0
, op_mode
))
1045 spu_compare_op0
= force_reg (op_mode
, spu_compare_op0
);
1046 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
1047 (spu_compare_op1
, op_mode
))
1048 spu_compare_op1
= force_reg (op_mode
, spu_compare_op1
);
1049 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
1054 emit_insn (comp_rtx
);
1058 eq_result
= gen_reg_rtx (comp_mode
);
1059 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
1065 ior_code
= ior_optab
->handlers
[(int)comp_mode
].insn_code
;
1066 gcc_assert (ior_code
!= CODE_FOR_nothing
);
1067 emit_insn (GEN_FCN (ior_code
)
1068 (compare_result
, compare_result
, eq_result
));
1077 /* We don't have branch on QI compare insns, so we convert the
1078 QI compare result to a HI result. */
1079 if (comp_mode
== QImode
)
1081 rtx old_res
= compare_result
;
1082 compare_result
= gen_reg_rtx (HImode
);
1084 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
1088 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
1090 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
1092 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, target
);
1093 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
1094 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
1097 else if (is_set
== 2)
1099 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
1100 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
1101 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
1103 rtx op_t
= operands
[2];
1104 rtx op_f
= operands
[3];
1106 /* The result of the comparison can be SI, HI or QI mode. Create a
1107 mask based on that result. */
1108 if (target_size
> compare_size
)
1110 select_mask
= gen_reg_rtx (mode
);
1111 emit_insn (gen_extend_compare (select_mask
, compare_result
));
1113 else if (target_size
< compare_size
)
1115 gen_rtx_SUBREG (mode
, compare_result
,
1116 (compare_size
- target_size
) / BITS_PER_UNIT
);
1117 else if (comp_mode
!= mode
)
1118 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
1120 select_mask
= compare_result
;
1122 if (GET_MODE (target
) != GET_MODE (op_t
)
1123 || GET_MODE (target
) != GET_MODE (op_f
))
1127 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1129 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1134 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1135 gen_rtx_NOT (comp_mode
, compare_result
)));
1136 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1137 emit_insn (gen_extendhisi2 (target
, compare_result
));
1138 else if (GET_MODE (target
) == SImode
1139 && GET_MODE (compare_result
) == QImode
)
1140 emit_insn (gen_extend_compare (target
, compare_result
));
1142 emit_move_insn (target
, compare_result
);
1147 const_double_to_hwint (rtx x
)
1151 if (GET_MODE (x
) == SFmode
)
1153 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1154 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1156 else if (GET_MODE (x
) == DFmode
)
1159 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1160 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1162 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1170 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1174 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1177 tv
[0] = (v
<< 32) >> 32;
1178 else if (mode
== DFmode
)
1180 tv
[1] = (v
<< 32) >> 32;
1183 real_from_target (&rv
, tv
, mode
);
1184 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1188 print_operand_address (FILE * file
, register rtx addr
)
1193 if (GET_CODE (addr
) == AND
1194 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1195 && INTVAL (XEXP (addr
, 1)) == -16)
1196 addr
= XEXP (addr
, 0);
1198 switch (GET_CODE (addr
))
1201 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1205 reg
= XEXP (addr
, 0);
1206 offset
= XEXP (addr
, 1);
1207 if (GET_CODE (offset
) == REG
)
1209 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1210 reg_names
[REGNO (offset
)]);
1212 else if (GET_CODE (offset
) == CONST_INT
)
1214 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1215 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1225 output_addr_const (file
, addr
);
1235 print_operand (FILE * file
, rtx x
, int code
)
1237 enum machine_mode mode
= GET_MODE (x
);
1239 unsigned char arr
[16];
1240 int xcode
= GET_CODE (x
);
1242 if (GET_MODE (x
) == VOIDmode
)
1245 case 'L': /* 128 bits, signed */
1246 case 'm': /* 128 bits, signed */
1247 case 'T': /* 128 bits, signed */
1248 case 't': /* 128 bits, signed */
1251 case 'K': /* 64 bits, signed */
1252 case 'k': /* 64 bits, signed */
1253 case 'D': /* 64 bits, signed */
1254 case 'd': /* 64 bits, signed */
1257 case 'J': /* 32 bits, signed */
1258 case 'j': /* 32 bits, signed */
1259 case 's': /* 32 bits, signed */
1260 case 'S': /* 32 bits, signed */
1267 case 'j': /* 32 bits, signed */
1268 case 'k': /* 64 bits, signed */
1269 case 'm': /* 128 bits, signed */
1270 if (xcode
== CONST_INT
1271 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1273 gcc_assert (logical_immediate_p (x
, mode
));
1274 constant_to_array (mode
, x
, arr
);
1275 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1276 val
= trunc_int_for_mode (val
, SImode
);
1277 switch (which_logical_immediate (val
))
1282 fprintf (file
, "h");
1285 fprintf (file
, "b");
1295 case 'J': /* 32 bits, signed */
1296 case 'K': /* 64 bits, signed */
1297 case 'L': /* 128 bits, signed */
1298 if (xcode
== CONST_INT
1299 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1301 gcc_assert (logical_immediate_p (x
, mode
)
1302 || iohl_immediate_p (x
, mode
));
1303 constant_to_array (mode
, x
, arr
);
1304 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1305 val
= trunc_int_for_mode (val
, SImode
);
1306 switch (which_logical_immediate (val
))
1312 val
= trunc_int_for_mode (val
, HImode
);
1315 val
= trunc_int_for_mode (val
, QImode
);
1320 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1326 case 't': /* 128 bits, signed */
1327 case 'd': /* 64 bits, signed */
1328 case 's': /* 32 bits, signed */
1331 enum immediate_class c
= classify_immediate (x
, mode
);
1335 constant_to_array (mode
, x
, arr
);
1336 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1337 val
= trunc_int_for_mode (val
, SImode
);
1338 switch (which_immediate_load (val
))
1343 fprintf (file
, "a");
1346 fprintf (file
, "h");
1349 fprintf (file
, "hu");
1356 constant_to_array (mode
, x
, arr
);
1357 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1359 fprintf (file
, "b");
1361 fprintf (file
, "h");
1363 fprintf (file
, "w");
1365 fprintf (file
, "d");
1368 if (xcode
== CONST_VECTOR
)
1370 x
= CONST_VECTOR_ELT (x
, 0);
1371 xcode
= GET_CODE (x
);
1373 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1374 fprintf (file
, "a");
1375 else if (xcode
== HIGH
)
1376 fprintf (file
, "hu");
1390 case 'T': /* 128 bits, signed */
1391 case 'D': /* 64 bits, signed */
1392 case 'S': /* 32 bits, signed */
1395 enum immediate_class c
= classify_immediate (x
, mode
);
1399 constant_to_array (mode
, x
, arr
);
1400 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1401 val
= trunc_int_for_mode (val
, SImode
);
1402 switch (which_immediate_load (val
))
1409 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1414 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1417 constant_to_array (mode
, x
, arr
);
1419 for (i
= 0; i
< 16; i
++)
1424 print_operand (file
, GEN_INT (val
), 0);
1427 constant_to_array (mode
, x
, arr
);
1428 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1429 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1434 if (GET_CODE (x
) == CONST_VECTOR
)
1435 x
= CONST_VECTOR_ELT (x
, 0);
1436 output_addr_const (file
, x
);
1438 fprintf (file
, "@h");
1452 if (xcode
== CONST_INT
)
1454 /* Only 4 least significant bits are relevant for generate
1455 control word instructions. */
1456 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1461 case 'M': /* print code for c*d */
1462 if (GET_CODE (x
) == CONST_INT
)
1466 fprintf (file
, "b");
1469 fprintf (file
, "h");
1472 fprintf (file
, "w");
1475 fprintf (file
, "d");
1484 case 'N': /* Negate the operand */
1485 if (xcode
== CONST_INT
)
1486 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1487 else if (xcode
== CONST_VECTOR
)
1488 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1489 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1492 case 'I': /* enable/disable interrupts */
1493 if (xcode
== CONST_INT
)
1494 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1497 case 'b': /* branch modifiers */
1499 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1500 else if (COMPARISON_P (x
))
1501 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1504 case 'i': /* indirect call */
1507 if (GET_CODE (XEXP (x
, 0)) == REG
)
1508 /* Used in indirect function calls. */
1509 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1511 output_address (XEXP (x
, 0));
1515 case 'p': /* load/store */
1519 xcode
= GET_CODE (x
);
1524 xcode
= GET_CODE (x
);
1527 fprintf (file
, "d");
1528 else if (xcode
== CONST_INT
)
1529 fprintf (file
, "a");
1530 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1531 fprintf (file
, "r");
1532 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1534 if (GET_CODE (XEXP (x
, 1)) == REG
)
1535 fprintf (file
, "x");
1537 fprintf (file
, "d");
1542 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1544 output_addr_const (file
, GEN_INT (val
));
1548 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1550 output_addr_const (file
, GEN_INT (val
));
1554 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1556 output_addr_const (file
, GEN_INT (val
));
1560 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1561 val
= (val
>> 3) & 0x1f;
1562 output_addr_const (file
, GEN_INT (val
));
1566 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1569 output_addr_const (file
, GEN_INT (val
));
1573 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1576 output_addr_const (file
, GEN_INT (val
));
1580 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1583 output_addr_const (file
, GEN_INT (val
));
1587 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1588 val
= -(val
& -8ll);
1589 val
= (val
>> 3) & 0x1f;
1590 output_addr_const (file
, GEN_INT (val
));
1595 constant_to_array (mode
, x
, arr
);
1596 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1597 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1602 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1603 else if (xcode
== MEM
)
1604 output_address (XEXP (x
, 0));
1605 else if (xcode
== CONST_VECTOR
)
1606 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1608 output_addr_const (file
, x
);
1615 output_operand_lossage ("invalid %%xn code");
1620 extern char call_used_regs
[];
1622 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1623 caller saved register. For leaf functions it is more efficient to
1624 use a volatile register because we won't need to save and restore the
1625 pic register. This routine is only valid after register allocation
1626 is completed, so we can pick an unused register. */
1630 rtx pic_reg
= pic_offset_table_rtx
;
1631 if (!reload_completed
&& !reload_in_progress
)
1636 /* Split constant addresses to handle cases that are too large.
1637 Add in the pic register when in PIC mode.
1638 Split immediates that require more than 1 instruction. */
1640 spu_split_immediate (rtx
* ops
)
1642 enum machine_mode mode
= GET_MODE (ops
[0]);
1643 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1649 unsigned char arrhi
[16];
1650 unsigned char arrlo
[16];
1651 rtx to
, temp
, hi
, lo
;
1653 enum machine_mode imode
= mode
;
1654 /* We need to do reals as ints because the constant used in the
1655 IOR might not be a legitimate real constant. */
1656 imode
= int_mode_for_mode (mode
);
1657 constant_to_array (mode
, ops
[1], arrhi
);
1659 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1662 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1663 for (i
= 0; i
< 16; i
+= 4)
1665 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1666 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1667 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1668 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1670 hi
= array_to_constant (imode
, arrhi
);
1671 lo
= array_to_constant (imode
, arrlo
);
1672 emit_move_insn (temp
, hi
);
1673 emit_insn (gen_rtx_SET
1674 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1679 unsigned char arr_fsmbi
[16];
1680 unsigned char arr_andbi
[16];
1681 rtx to
, reg_fsmbi
, reg_and
;
1683 enum machine_mode imode
= mode
;
1684 /* We need to do reals as ints because the constant used in the
1685 * AND might not be a legitimate real constant. */
1686 imode
= int_mode_for_mode (mode
);
1687 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1689 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1692 for (i
= 0; i
< 16; i
++)
1693 if (arr_fsmbi
[i
] != 0)
1695 arr_andbi
[0] = arr_fsmbi
[i
];
1696 arr_fsmbi
[i
] = 0xff;
1698 for (i
= 1; i
< 16; i
++)
1699 arr_andbi
[i
] = arr_andbi
[0];
1700 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1701 reg_and
= array_to_constant (imode
, arr_andbi
);
1702 emit_move_insn (to
, reg_fsmbi
);
1703 emit_insn (gen_rtx_SET
1704 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1708 if (reload_in_progress
|| reload_completed
)
1710 rtx mem
= force_const_mem (mode
, ops
[1]);
1711 if (TARGET_LARGE_MEM
)
1713 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1714 emit_move_insn (addr
, XEXP (mem
, 0));
1715 mem
= replace_equiv_address (mem
, addr
);
1717 emit_move_insn (ops
[0], mem
);
1723 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1727 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1728 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1731 emit_insn (gen_pic (ops
[0], ops
[1]));
1734 rtx pic_reg
= get_pic_reg ();
1735 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1736 crtl
->uses_pic_offset_table
= 1;
1738 return flag_pic
|| c
== IC_IL2s
;
1749 /* SAVING is TRUE when we are generating the actual load and store
1750 instructions for REGNO. When determining the size of the stack
1751 needed for saving register we must allocate enough space for the
1752 worst case, because we don't always have the information early enough
1753 to not allocate it. But we can at least eliminate the actual loads
1754 and stores during the prologue/epilogue. */
1756 need_to_save_reg (int regno
, int saving
)
1758 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1761 && regno
== PIC_OFFSET_TABLE_REGNUM
1762 && (!saving
|| crtl
->uses_pic_offset_table
)
1764 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1769 /* This function is only correct starting with local register
1772 spu_saved_regs_size (void)
1774 int reg_save_size
= 0;
1777 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1778 if (need_to_save_reg (regno
, 0))
1779 reg_save_size
+= 0x10;
1780 return reg_save_size
;
1784 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1786 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1788 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1789 return emit_insn (gen_movv4si (mem
, reg
));
1793 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1795 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1797 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1798 return emit_insn (gen_movv4si (reg
, mem
));
1801 /* This happens after reload, so we need to expand it. */
1803 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1806 if (satisfies_constraint_K (GEN_INT (imm
)))
1808 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1812 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1813 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1814 if (REGNO (src
) == REGNO (scratch
))
1820 /* Return nonzero if this function is known to have a null epilogue. */
1823 direct_return (void)
1825 if (reload_completed
)
1827 if (cfun
->static_chain_decl
== 0
1828 && (spu_saved_regs_size ()
1830 + crtl
->outgoing_args_size
1831 + crtl
->args
.pretend_args_size
== 0)
1832 && current_function_is_leaf
)
1839 The stack frame looks like this:
1843 AP -> +-------------+
1846 prev SP | back chain |
1849 | reg save | crtl->args.pretend_args_size bytes
1852 | saved regs | spu_saved_regs_size() bytes
1853 FP -> +-------------+
1855 | vars | get_frame_size() bytes
1856 HFP -> +-------------+
1859 | args | crtl->outgoing_args_size bytes
1865 SP -> +-------------+
1869 spu_expand_prologue (void)
1871 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1872 HOST_WIDE_INT total_size
;
1873 HOST_WIDE_INT saved_regs_size
;
1874 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1875 rtx scratch_reg_0
, scratch_reg_1
;
1878 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1879 the "toplevel" insn chain. */
1880 emit_note (NOTE_INSN_DELETED
);
1882 if (flag_pic
&& optimize
== 0)
1883 crtl
->uses_pic_offset_table
= 1;
1885 if (spu_naked_function_p (current_function_decl
))
1888 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1889 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1891 saved_regs_size
= spu_saved_regs_size ();
1892 total_size
= size
+ saved_regs_size
1893 + crtl
->outgoing_args_size
1894 + crtl
->args
.pretend_args_size
;
1896 if (!current_function_is_leaf
1897 || cfun
->calls_alloca
|| total_size
> 0)
1898 total_size
+= STACK_POINTER_OFFSET
;
1900 /* Save this first because code after this might use the link
1901 register as a scratch register. */
1902 if (!current_function_is_leaf
)
1904 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1905 RTX_FRAME_RELATED_P (insn
) = 1;
1910 offset
= -crtl
->args
.pretend_args_size
;
1911 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1912 if (need_to_save_reg (regno
, 1))
1915 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1916 RTX_FRAME_RELATED_P (insn
) = 1;
1920 if (flag_pic
&& crtl
->uses_pic_offset_table
)
1922 rtx pic_reg
= get_pic_reg ();
1923 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1924 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1929 if (flag_stack_check
)
1931 /* We compare against total_size-1 because
1932 ($sp >= total_size) <=> ($sp > total_size-1) */
1933 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1934 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1935 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1936 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1938 emit_move_insn (scratch_v4si
, size_v4si
);
1939 size_v4si
= scratch_v4si
;
1941 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1942 emit_insn (gen_vec_extractv4si
1943 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1944 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1947 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1948 the value of the previous $sp because we save it as the back
1950 if (total_size
<= 2000)
1952 /* In this case we save the back chain first. */
1953 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1955 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1959 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1961 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1963 RTX_FRAME_RELATED_P (insn
) = 1;
1964 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1965 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1967 if (total_size
> 2000)
1969 /* Save the back chain ptr */
1970 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1973 if (frame_pointer_needed
)
1975 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1976 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1977 + crtl
->outgoing_args_size
;
1978 /* Set the new frame_pointer */
1979 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1980 RTX_FRAME_RELATED_P (insn
) = 1;
1981 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1982 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
1983 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1987 emit_note (NOTE_INSN_DELETED
);
1991 spu_expand_epilogue (bool sibcall_p
)
1993 int size
= get_frame_size (), offset
, regno
;
1994 HOST_WIDE_INT saved_regs_size
, total_size
;
1995 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1996 rtx jump
, scratch_reg_0
;
1998 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1999 the "toplevel" insn chain. */
2000 emit_note (NOTE_INSN_DELETED
);
2002 if (spu_naked_function_p (current_function_decl
))
2005 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
2007 saved_regs_size
= spu_saved_regs_size ();
2008 total_size
= size
+ saved_regs_size
2009 + crtl
->outgoing_args_size
2010 + crtl
->args
.pretend_args_size
;
2012 if (!current_function_is_leaf
2013 || cfun
->calls_alloca
|| total_size
> 0)
2014 total_size
+= STACK_POINTER_OFFSET
;
2018 if (cfun
->calls_alloca
)
2019 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
2021 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
2024 if (saved_regs_size
> 0)
2026 offset
= -crtl
->args
.pretend_args_size
;
2027 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2028 if (need_to_save_reg (regno
, 1))
2031 frame_emit_load (regno
, sp_reg
, offset
);
2036 if (!current_function_is_leaf
)
2037 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2041 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
2042 jump
= emit_jump_insn (gen__return ());
2043 emit_barrier_after (jump
);
2046 emit_note (NOTE_INSN_DELETED
);
2050 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
2054 /* This is inefficient because it ends up copying to a save-register
2055 which then gets saved even though $lr has already been saved. But
2056 it does generate better code for leaf functions and we don't need
2057 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2058 used for __builtin_return_address anyway, so maybe we don't care if
2059 it's inefficient. */
2060 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
2064 /* Given VAL, generate a constant appropriate for MODE.
2065 If MODE is a vector mode, every element will be VAL.
2066 For TImode, VAL will be zero extended to 128 bits. */
2068 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
2074 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
2075 || GET_MODE_CLASS (mode
) == MODE_FLOAT
2076 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
2077 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
2079 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2080 return immed_double_const (val
, 0, mode
);
2082 /* val is the bit representation of the float */
2083 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
2084 return hwint_to_const_double (mode
, val
);
2086 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
2087 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
2089 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
2091 units
= GET_MODE_NUNITS (mode
);
2093 v
= rtvec_alloc (units
);
2095 for (i
= 0; i
< units
; ++i
)
2096 RTVEC_ELT (v
, i
) = inner
;
2098 return gen_rtx_CONST_VECTOR (mode
, v
);
2101 /* Create a MODE vector constant from 4 ints. */
2103 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
2105 unsigned char arr
[16];
2106 arr
[0] = (a
>> 24) & 0xff;
2107 arr
[1] = (a
>> 16) & 0xff;
2108 arr
[2] = (a
>> 8) & 0xff;
2109 arr
[3] = (a
>> 0) & 0xff;
2110 arr
[4] = (b
>> 24) & 0xff;
2111 arr
[5] = (b
>> 16) & 0xff;
2112 arr
[6] = (b
>> 8) & 0xff;
2113 arr
[7] = (b
>> 0) & 0xff;
2114 arr
[8] = (c
>> 24) & 0xff;
2115 arr
[9] = (c
>> 16) & 0xff;
2116 arr
[10] = (c
>> 8) & 0xff;
2117 arr
[11] = (c
>> 0) & 0xff;
2118 arr
[12] = (d
>> 24) & 0xff;
2119 arr
[13] = (d
>> 16) & 0xff;
2120 arr
[14] = (d
>> 8) & 0xff;
2121 arr
[15] = (d
>> 0) & 0xff;
2122 return array_to_constant(mode
, arr
);
2125 /* branch hint stuff */
2127 /* An array of these is used to propagate hints to predecessor blocks. */
2130 rtx prop_jump
; /* propagated from another block */
2131 int bb_index
; /* the original block. */
2133 static struct spu_bb_info
*spu_bb_info
;
2135 #define STOP_HINT_P(INSN) \
2136 (GET_CODE(INSN) == CALL_INSN \
2137 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2138 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2140 /* 1 when RTX is a hinted branch or its target. We keep track of
2141 what has been hinted so the safe-hint code can test it easily. */
2142 #define HINTED_P(RTX) \
2143 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2145 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2146 #define SCHED_ON_EVEN_P(RTX) \
2147 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2149 /* Emit a nop for INSN such that the two will dual issue. This assumes
2150 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2151 We check for TImode to handle a MULTI1 insn which has dual issued its
2152 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2155 emit_nop_for_insn (rtx insn
)
2159 p
= get_pipe (insn
);
2160 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2161 new_insn
= emit_insn_after (gen_lnop (), insn
);
2162 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2164 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2165 PUT_MODE (new_insn
, TImode
);
2166 PUT_MODE (insn
, VOIDmode
);
2169 new_insn
= emit_insn_after (gen_lnop (), insn
);
2170 recog_memoized (new_insn
);
2173 /* Insert nops in basic blocks to meet dual issue alignment
2174 requirements. Also make sure hbrp and hint instructions are at least
2175 one cycle apart, possibly inserting a nop. */
2179 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2183 /* This sets up INSN_ADDRESSES. */
2184 shorten_branches (get_insns ());
2186 /* Keep track of length added by nops. */
2190 insn
= get_insns ();
2191 if (!active_insn_p (insn
))
2192 insn
= next_active_insn (insn
);
2193 for (; insn
; insn
= next_insn
)
2195 next_insn
= next_active_insn (insn
);
2196 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2197 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2201 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2202 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2203 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2206 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2207 PUT_MODE (prev_insn
, GET_MODE (insn
));
2208 PUT_MODE (insn
, TImode
);
2214 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2216 if (GET_MODE (insn
) == TImode
)
2217 PUT_MODE (next_insn
, TImode
);
2219 next_insn
= next_active_insn (insn
);
2221 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2222 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2224 if (((addr
+ length
) & 7) != 0)
2226 emit_nop_for_insn (prev_insn
);
2230 else if (GET_MODE (insn
) == TImode
2231 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2232 || get_attr_type (insn
) == TYPE_MULTI0
)
2233 && ((addr
+ length
) & 7) != 0)
2235 /* prev_insn will always be set because the first insn is
2236 always 8-byte aligned. */
2237 emit_nop_for_insn (prev_insn
);
2245 /* Routines for branch hints. */
2248 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2249 int distance
, sbitmap blocks
)
2251 rtx branch_label
= 0;
2256 if (before
== 0 || branch
== 0 || target
== 0)
2259 /* While scheduling we require hints to be no further than 600, so
2260 we need to enforce that here too */
2264 /* If we have a Basic block note, emit it after the basic block note. */
2265 if (NOTE_KIND (before
) == NOTE_INSN_BASIC_BLOCK
)
2266 before
= NEXT_INSN (before
);
2268 branch_label
= gen_label_rtx ();
2269 LABEL_NUSES (branch_label
)++;
2270 LABEL_PRESERVE_P (branch_label
) = 1;
2271 insn
= emit_label_before (branch_label
, branch
);
2272 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2273 SET_BIT (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2275 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2276 recog_memoized (hint
);
2277 HINTED_P (branch
) = 1;
2279 if (GET_CODE (target
) == LABEL_REF
)
2280 HINTED_P (XEXP (target
, 0)) = 1;
2281 else if (tablejump_p (branch
, 0, &table
))
2285 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2286 vec
= XVEC (PATTERN (table
), 0);
2288 vec
= XVEC (PATTERN (table
), 1);
2289 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2290 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2293 if (distance
>= 588)
2295 /* Make sure the hint isn't scheduled any earlier than this point,
2296 which could make it too far for the branch offest to fit */
2297 recog_memoized (emit_insn_before (gen_blockage (), hint
));
2299 else if (distance
<= 8 * 4)
2301 /* To guarantee at least 8 insns between the hint and branch we
2304 for (d
= distance
; d
< 8 * 4; d
+= 4)
2307 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2308 recog_memoized (insn
);
2311 /* Make sure any nops inserted aren't scheduled before the hint. */
2312 recog_memoized (emit_insn_after (gen_blockage (), hint
));
2314 /* Make sure any nops inserted aren't scheduled after the call. */
2315 if (CALL_P (branch
) && distance
< 8 * 4)
2316 recog_memoized (emit_insn_before (gen_blockage (), branch
));
2320 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2321 the rtx for the branch target. */
2323 get_branch_target (rtx branch
)
2325 if (GET_CODE (branch
) == JUMP_INSN
)
2329 /* Return statements */
2330 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2331 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2334 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2335 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2338 set
= single_set (branch
);
2339 src
= SET_SRC (set
);
2340 if (GET_CODE (SET_DEST (set
)) != PC
)
2343 if (GET_CODE (src
) == IF_THEN_ELSE
)
2346 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2349 /* If the more probable case is not a fall through, then
2350 try a branch hint. */
2351 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2352 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2353 && GET_CODE (XEXP (src
, 1)) != PC
)
2354 lab
= XEXP (src
, 1);
2355 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2356 && GET_CODE (XEXP (src
, 2)) != PC
)
2357 lab
= XEXP (src
, 2);
2361 if (GET_CODE (lab
) == RETURN
)
2362 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2370 else if (GET_CODE (branch
) == CALL_INSN
)
2373 /* All of our call patterns are in a PARALLEL and the CALL is
2374 the first pattern in the PARALLEL. */
2375 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2377 call
= XVECEXP (PATTERN (branch
), 0, 0);
2378 if (GET_CODE (call
) == SET
)
2379 call
= SET_SRC (call
);
2380 if (GET_CODE (call
) != CALL
)
2382 return XEXP (XEXP (call
, 0), 0);
2387 /* The special $hbr register is used to prevent the insn scheduler from
2388 moving hbr insns across instructions which invalidate them. It
2389 should only be used in a clobber, and this function searches for
2390 insns which clobber it. */
2392 insn_clobbers_hbr (rtx insn
)
2395 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2397 rtx parallel
= PATTERN (insn
);
2400 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2402 clobber
= XVECEXP (parallel
, 0, j
);
2403 if (GET_CODE (clobber
) == CLOBBER
2404 && GET_CODE (XEXP (clobber
, 0)) == REG
2405 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2412 /* Search up to 32 insns starting at FIRST:
2413 - at any kind of hinted branch, just return
2414 - at any unconditional branch in the first 15 insns, just return
2415 - at a call or indirect branch, after the first 15 insns, force it to
2416 an even address and return
2417 - at any unconditional branch, after the first 15 insns, force it to
2419 At then end of the search, insert an hbrp within 4 insns of FIRST,
2420 and an hbrp within 16 instructions of FIRST.
2423 insert_hbrp_for_ilb_runout (rtx first
)
2425 rtx insn
, before_4
= 0, before_16
= 0;
2426 int addr
= 0, length
, first_addr
= -1;
2427 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2428 int insert_lnop_after
= 0;
2429 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2432 if (first_addr
== -1)
2433 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2434 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2435 length
= get_attr_length (insn
);
2437 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2439 /* We test for 14 instructions because the first hbrp will add
2440 up to 2 instructions. */
2441 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2444 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2446 /* Make sure an hbrp is at least 2 cycles away from a hint.
2447 Insert an lnop after the hbrp when necessary. */
2448 if (before_4
== 0 && addr
> 0)
2451 insert_lnop_after
|= 1;
2453 else if (before_4
&& addr
<= 4 * 4)
2454 insert_lnop_after
|= 1;
2455 if (before_16
== 0 && addr
> 10 * 4)
2458 insert_lnop_after
|= 2;
2460 else if (before_16
&& addr
<= 14 * 4)
2461 insert_lnop_after
|= 2;
2464 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2466 if (addr
< hbrp_addr0
)
2468 else if (addr
< hbrp_addr1
)
2472 if (CALL_P (insn
) || JUMP_P (insn
))
2474 if (HINTED_P (insn
))
2477 /* Any branch after the first 15 insns should be on an even
2478 address to avoid a special case branch. There might be
2479 some nops and/or hbrps inserted, so we test after 10
2482 SCHED_ON_EVEN_P (insn
) = 1;
2485 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2489 if (addr
+ length
>= 32 * 4)
2491 gcc_assert (before_4
&& before_16
);
2492 if (hbrp_addr0
> 4 * 4)
2495 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2496 recog_memoized (insn
);
2497 INSN_ADDRESSES_NEW (insn
,
2498 INSN_ADDRESSES (INSN_UID (before_4
)));
2499 PUT_MODE (insn
, GET_MODE (before_4
));
2500 PUT_MODE (before_4
, TImode
);
2501 if (insert_lnop_after
& 1)
2503 insn
= emit_insn_before (gen_lnop (), before_4
);
2504 recog_memoized (insn
);
2505 INSN_ADDRESSES_NEW (insn
,
2506 INSN_ADDRESSES (INSN_UID (before_4
)));
2507 PUT_MODE (insn
, TImode
);
2510 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2511 && hbrp_addr1
> 16 * 4)
2514 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2515 recog_memoized (insn
);
2516 INSN_ADDRESSES_NEW (insn
,
2517 INSN_ADDRESSES (INSN_UID (before_16
)));
2518 PUT_MODE (insn
, GET_MODE (before_16
));
2519 PUT_MODE (before_16
, TImode
);
2520 if (insert_lnop_after
& 2)
2522 insn
= emit_insn_before (gen_lnop (), before_16
);
2523 recog_memoized (insn
);
2524 INSN_ADDRESSES_NEW (insn
,
2525 INSN_ADDRESSES (INSN_UID
2527 PUT_MODE (insn
, TImode
);
2533 else if (BARRIER_P (insn
))
2538 /* The SPU might hang when it executes 48 inline instructions after a
2539 hinted branch jumps to its hinted target. The beginning of a
2540 function and the return from a call might have been hinted, and must
2541 be handled as well. To prevent a hang we insert 2 hbrps. The first
2542 should be within 6 insns of the branch target. The second should be
2543 within 22 insns of the branch target. When determining if hbrps are
2544 necessary, we look for only 32 inline instructions, because up to to
2545 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2546 new hbrps, we insert them within 4 and 16 insns of the target. */
2551 if (TARGET_SAFE_HINTS
)
2553 shorten_branches (get_insns ());
2554 /* Insert hbrp at beginning of function */
2555 insn
= next_active_insn (get_insns ());
2557 insert_hbrp_for_ilb_runout (insn
);
2558 /* Insert hbrp after hinted targets. */
2559 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2560 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2561 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2565 static int in_spu_reorg
;
2567 /* Insert branch hints. There are no branch optimizations after this
2568 pass, so it's safe to set our branch hints now. */
2570 spu_machine_dependent_reorg (void)
2575 rtx branch_target
= 0;
2576 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2580 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2582 /* We still do it for unoptimized code because an external
2583 function might have hinted a call or return. */
2589 blocks
= sbitmap_alloc (last_basic_block
);
2590 sbitmap_zero (blocks
);
2593 compute_bb_for_insn ();
2598 (struct spu_bb_info
*) xcalloc (n_basic_blocks
,
2599 sizeof (struct spu_bb_info
));
2601 /* We need exact insn addresses and lengths. */
2602 shorten_branches (get_insns ());
2604 for (i
= n_basic_blocks
- 1; i
>= 0; i
--)
2606 bb
= BASIC_BLOCK (i
);
2608 if (spu_bb_info
[i
].prop_jump
)
2610 branch
= spu_bb_info
[i
].prop_jump
;
2611 branch_target
= get_branch_target (branch
);
2612 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2613 required_dist
= spu_hint_dist
;
2615 /* Search from end of a block to beginning. In this loop, find
2616 jumps which need a branch and emit them only when:
2617 - it's an indirect branch and we're at the insn which sets
2619 - we're at an insn that will invalidate the hint. e.g., a
2620 call, another hint insn, inline asm that clobbers $hbr, and
2621 some inlined operations (divmodsi4). Don't consider jumps
2622 because they are only at the end of a block and are
2623 considered when we are deciding whether to propagate
2624 - we're getting too far away from the branch. The hbr insns
2625 only have a signed 10 bit offset
2626 We go back as far as possible so the branch will be considered
2627 for propagation when we get to the beginning of the block. */
2628 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2632 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2634 && ((GET_CODE (branch_target
) == REG
2635 && set_of (branch_target
, insn
) != NULL_RTX
)
2636 || insn_clobbers_hbr (insn
)
2637 || branch_addr
- insn_addr
> 600))
2639 rtx next
= NEXT_INSN (insn
);
2640 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2641 if (insn
!= BB_END (bb
)
2642 && branch_addr
- next_addr
>= required_dist
)
2646 "hint for %i in block %i before %i\n",
2647 INSN_UID (branch
), bb
->index
,
2649 spu_emit_branch_hint (next
, branch
, branch_target
,
2650 branch_addr
- next_addr
, blocks
);
2655 /* JUMP_P will only be true at the end of a block. When
2656 branch is already set it means we've previously decided
2657 to propagate a hint for that branch into this block. */
2658 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2661 if ((branch_target
= get_branch_target (insn
)))
2664 branch_addr
= insn_addr
;
2665 required_dist
= spu_hint_dist
;
2669 if (insn
== BB_HEAD (bb
))
2675 /* If we haven't emitted a hint for this branch yet, it might
2676 be profitable to emit it in one of the predecessor blocks,
2677 especially for loops. */
2679 basic_block prev
= 0, prop
= 0, prev2
= 0;
2680 int loop_exit
= 0, simple_loop
= 0;
2681 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2683 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2684 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2685 prev
= EDGE_PRED (bb
, j
)->src
;
2687 prev2
= EDGE_PRED (bb
, j
)->src
;
2689 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2690 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2692 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2695 /* If this branch is a loop exit then propagate to previous
2696 fallthru block. This catches the cases when it is a simple
2697 loop or when there is an initial branch into the loop. */
2698 if (prev
&& (loop_exit
|| simple_loop
)
2699 && prev
->loop_depth
<= bb
->loop_depth
)
2702 /* If there is only one adjacent predecessor. Don't propagate
2703 outside this loop. This loop_depth test isn't perfect, but
2704 I'm not sure the loop_father member is valid at this point. */
2705 else if (prev
&& single_pred_p (bb
)
2706 && prev
->loop_depth
== bb
->loop_depth
)
2709 /* If this is the JOIN block of a simple IF-THEN then
2710 propogate the hint to the HEADER block. */
2711 else if (prev
&& prev2
2712 && EDGE_COUNT (bb
->preds
) == 2
2713 && EDGE_COUNT (prev
->preds
) == 1
2714 && EDGE_PRED (prev
, 0)->src
== prev2
2715 && prev2
->loop_depth
== bb
->loop_depth
2716 && GET_CODE (branch_target
) != REG
)
2719 /* Don't propagate when:
2720 - this is a simple loop and the hint would be too far
2721 - this is not a simple loop and there are 16 insns in
2723 - the predecessor block ends in a branch that will be
2725 - the predecessor block ends in an insn that invalidates
2729 && (bbend
= BB_END (prop
))
2730 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2731 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2732 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2735 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2736 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2737 bb
->index
, prop
->index
, bb
->loop_depth
,
2738 INSN_UID (branch
), loop_exit
, simple_loop
,
2739 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2741 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2742 spu_bb_info
[prop
->index
].bb_index
= i
;
2744 else if (branch_addr
- next_addr
>= required_dist
)
2747 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2748 INSN_UID (branch
), bb
->index
,
2749 INSN_UID (NEXT_INSN (insn
)));
2750 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2751 branch_addr
- next_addr
, blocks
);
2758 if (!sbitmap_empty_p (blocks
))
2759 find_many_sub_basic_blocks (blocks
);
2761 /* We have to schedule to make sure alignment is ok. */
2762 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2764 /* The hints need to be scheduled, so call it again. */
2771 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2772 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2774 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2775 between its branch label and the branch . We don't move the
2776 label because GCC expects it at the beginning of the block. */
2777 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2778 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2779 rtx label
= XEXP (label_ref
, 0);
2782 for (branch
= NEXT_INSN (label
);
2783 !JUMP_P (branch
) && !CALL_P (branch
);
2784 branch
= NEXT_INSN (branch
))
2785 if (NONJUMP_INSN_P (branch
))
2786 offset
+= get_attr_length (branch
);
2788 XVECEXP (unspec
, 0, 0) = plus_constant (label_ref
, offset
);
2791 if (spu_flag_var_tracking
)
2794 timevar_push (TV_VAR_TRACKING
);
2795 variable_tracking_main ();
2796 timevar_pop (TV_VAR_TRACKING
);
2797 df_finish_pass (false);
2800 free_bb_for_insn ();
2806 /* Insn scheduling routines, primarily for dual issue. */
2808 spu_sched_issue_rate (void)
2814 uses_ls_unit(rtx insn
)
2816 rtx set
= single_set (insn
);
2818 && (GET_CODE (SET_DEST (set
)) == MEM
2819 || GET_CODE (SET_SRC (set
)) == MEM
))
2828 /* Handle inline asm */
2829 if (INSN_CODE (insn
) == -1)
2831 t
= get_attr_type (insn
);
2856 case TYPE_IPREFETCH
:
2864 /* haifa-sched.c has a static variable that keeps track of the current
2865 cycle. It is passed to spu_sched_reorder, and we record it here for
2866 use by spu_sched_variable_issue. It won't be accurate if the
2867 scheduler updates it's clock_var between the two calls. */
2868 static int clock_var
;
2870 /* This is used to keep track of insn alignment. Set to 0 at the
2871 beginning of each block and increased by the "length" attr of each
2873 static int spu_sched_length
;
2875 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2876 ready list appropriately in spu_sched_reorder(). */
2877 static int pipe0_clock
;
2878 static int pipe1_clock
;
2880 static int prev_clock_var
;
2882 static int prev_priority
;
2884 /* The SPU needs to load the next ilb sometime during the execution of
2885 the previous ilb. There is a potential conflict if every cycle has a
2886 load or store. To avoid the conflict we make sure the load/store
2887 unit is free for at least one cycle during the execution of insns in
2888 the previous ilb. */
2889 static int spu_ls_first
;
2890 static int prev_ls_clock
;
2893 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2894 int max_ready ATTRIBUTE_UNUSED
)
2896 spu_sched_length
= 0;
2900 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2901 int max_ready ATTRIBUTE_UNUSED
)
2903 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
2905 /* When any block might be at least 8-byte aligned, assume they
2906 will all be at least 8-byte aligned to make sure dual issue
2907 works out correctly. */
2908 spu_sched_length
= 0;
2910 spu_ls_first
= INT_MAX
;
2915 prev_clock_var
= -1;
2920 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
2921 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
2925 if (GET_CODE (PATTERN (insn
)) == USE
2926 || GET_CODE (PATTERN (insn
)) == CLOBBER
2927 || (len
= get_attr_length (insn
)) == 0)
2930 spu_sched_length
+= len
;
2932 /* Reset on inline asm */
2933 if (INSN_CODE (insn
) == -1)
2935 spu_ls_first
= INT_MAX
;
2940 p
= get_pipe (insn
);
2942 pipe0_clock
= clock_var
;
2944 pipe1_clock
= clock_var
;
2948 if (clock_var
- prev_ls_clock
> 1
2949 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2950 spu_ls_first
= INT_MAX
;
2951 if (uses_ls_unit (insn
))
2953 if (spu_ls_first
== INT_MAX
)
2954 spu_ls_first
= spu_sched_length
;
2955 prev_ls_clock
= clock_var
;
2958 /* The scheduler hasn't inserted the nop, but we will later on.
2959 Include those nops in spu_sched_length. */
2960 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
2961 spu_sched_length
+= 4;
2962 prev_clock_var
= clock_var
;
2964 /* more is -1 when called from spu_sched_reorder for new insns
2965 that don't have INSN_PRIORITY */
2967 prev_priority
= INSN_PRIORITY (insn
);
2970 /* Always try issueing more insns. spu_sched_reorder will decide
2971 when the cycle should be advanced. */
2975 /* This function is called for both TARGET_SCHED_REORDER and
2976 TARGET_SCHED_REORDER2. */
2978 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2979 rtx
*ready
, int *nreadyp
, int clock
)
2981 int i
, nready
= *nreadyp
;
2982 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
2987 if (nready
<= 0 || pipe1_clock
>= clock
)
2990 /* Find any rtl insns that don't generate assembly insns and schedule
2992 for (i
= nready
- 1; i
>= 0; i
--)
2995 if (INSN_CODE (insn
) == -1
2996 || INSN_CODE (insn
) == CODE_FOR_blockage
2997 || INSN_CODE (insn
) == CODE_FOR__spu_convert
)
2999 ready
[i
] = ready
[nready
- 1];
3000 ready
[nready
- 1] = insn
;
3005 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
3006 for (i
= 0; i
< nready
; i
++)
3007 if (INSN_CODE (ready
[i
]) != -1)
3010 switch (get_attr_type (insn
))
3035 case TYPE_IPREFETCH
:
3041 /* In the first scheduling phase, schedule loads and stores together
3042 to increase the chance they will get merged during postreload CSE. */
3043 if (!reload_completed
&& pipe_ls
>= 0)
3045 insn
= ready
[pipe_ls
];
3046 ready
[pipe_ls
] = ready
[nready
- 1];
3047 ready
[nready
- 1] = insn
;
3051 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3055 /* When we have loads/stores in every cycle of the last 15 insns and
3056 we are about to schedule another load/store, emit an hbrp insn
3059 && spu_sched_length
- spu_ls_first
>= 4 * 15
3060 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
3062 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3063 recog_memoized (insn
);
3064 if (pipe0_clock
< clock
)
3065 PUT_MODE (insn
, TImode
);
3066 spu_sched_variable_issue (file
, verbose
, insn
, -1);
3070 /* In general, we want to emit nops to increase dual issue, but dual
3071 issue isn't faster when one of the insns could be scheduled later
3072 without effecting the critical path. We look at INSN_PRIORITY to
3073 make a good guess, but it isn't perfect so -mdual-nops=n can be
3074 used to effect it. */
3075 if (in_spu_reorg
&& spu_dual_nops
< 10)
3077 /* When we are at an even address and we are not issueing nops to
3078 improve scheduling then we need to advance the cycle. */
3079 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
3080 && (spu_dual_nops
== 0
3083 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
3086 /* When at an odd address, schedule the highest priority insn
3087 without considering pipeline. */
3088 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
3089 && (spu_dual_nops
== 0
3091 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
3096 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3097 pipe0 insn in the ready list, schedule it. */
3098 if (pipe0_clock
< clock
&& pipe_0
>= 0)
3099 schedule_i
= pipe_0
;
3101 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3102 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3104 schedule_i
= pipe_1
;
3106 if (schedule_i
> -1)
3108 insn
= ready
[schedule_i
];
3109 ready
[schedule_i
] = ready
[nready
- 1];
3110 ready
[nready
- 1] = insn
;
3116 /* INSN is dependent on DEP_INSN. */
3118 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
3122 /* The blockage pattern is used to prevent instructions from being
3123 moved across it and has no cost. */
3124 if (INSN_CODE (insn
) == CODE_FOR_blockage
3125 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3128 if (INSN_CODE (insn
) == CODE_FOR__spu_convert
3129 || INSN_CODE (dep_insn
) == CODE_FOR__spu_convert
)
3132 /* Make sure hbrps are spread out. */
3133 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3134 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3137 /* Make sure hints and hbrps are 2 cycles apart. */
3138 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3139 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3140 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3141 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3144 /* An hbrp has no real dependency on other insns. */
3145 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3146 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3149 /* Assuming that it is unlikely an argument register will be used in
3150 the first cycle of the called function, we reduce the cost for
3151 slightly better scheduling of dep_insn. When not hinted, the
3152 mispredicted branch would hide the cost as well. */
3155 rtx target
= get_branch_target (insn
);
3156 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3161 /* And when returning from a function, let's assume the return values
3162 are completed sooner too. */
3163 if (CALL_P (dep_insn
))
3166 /* Make sure an instruction that loads from the back chain is schedule
3167 away from the return instruction so a hint is more likely to get
3169 if (INSN_CODE (insn
) == CODE_FOR__return
3170 && (set
= single_set (dep_insn
))
3171 && GET_CODE (SET_DEST (set
)) == REG
3172 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3175 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3176 scheduler makes every insn in a block anti-dependent on the final
3177 jump_insn. We adjust here so higher cost insns will get scheduled
3179 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3180 return insn_cost (dep_insn
) - 3;
3185 /* Create a CONST_DOUBLE from a string. */
3187 spu_float_const (const char *string
, enum machine_mode mode
)
3189 REAL_VALUE_TYPE value
;
3190 value
= REAL_VALUE_ATOF (string
, mode
);
3191 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3195 spu_constant_address_p (rtx x
)
3197 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3198 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3199 || GET_CODE (x
) == HIGH
);
3202 static enum spu_immediate
3203 which_immediate_load (HOST_WIDE_INT val
)
3205 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3207 if (val
>= -0x8000 && val
<= 0x7fff)
3209 if (val
>= 0 && val
<= 0x3ffff)
3211 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3213 if ((val
& 0xffff) == 0)
3219 /* Return true when OP can be loaded by one of the il instructions, or
3220 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3222 immediate_load_p (rtx op
, enum machine_mode mode
)
3224 if (CONSTANT_P (op
))
3226 enum immediate_class c
= classify_immediate (op
, mode
);
3227 return c
== IC_IL1
|| c
== IC_IL1s
3228 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3233 /* Return true if the first SIZE bytes of arr is a constant that can be
3234 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3235 represent the size and offset of the instruction to use. */
3237 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3239 int cpat
, run
, i
, start
;
3243 for (i
= 0; i
< size
&& cpat
; i
++)
3251 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3253 else if (arr
[i
] == 0)
3255 while (arr
[i
+run
] == run
&& i
+run
< 16)
3257 if (run
!= 4 && run
!= 8)
3262 if ((i
& (run
-1)) != 0)
3269 if (cpat
&& (run
|| size
< 16))
3276 *pstart
= start
== -1 ? 16-run
: start
;
3282 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3283 it into a register. MODE is only valid when OP is a CONST_INT. */
3284 static enum immediate_class
3285 classify_immediate (rtx op
, enum machine_mode mode
)
3288 unsigned char arr
[16];
3289 int i
, j
, repeated
, fsmbi
, repeat
;
3291 gcc_assert (CONSTANT_P (op
));
3293 if (GET_MODE (op
) != VOIDmode
)
3294 mode
= GET_MODE (op
);
3296 /* A V4SI const_vector with all identical symbols is ok. */
3299 && GET_CODE (op
) == CONST_VECTOR
3300 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3301 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3302 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3303 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3304 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3305 op
= CONST_VECTOR_ELT (op
, 0);
3307 switch (GET_CODE (op
))
3311 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3314 /* We can never know if the resulting address fits in 18 bits and can be
3315 loaded with ila. For now, assume the address will not overflow if
3316 the displacement is "small" (fits 'K' constraint). */
3317 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3319 rtx sym
= XEXP (XEXP (op
, 0), 0);
3320 rtx cst
= XEXP (XEXP (op
, 0), 1);
3322 if (GET_CODE (sym
) == SYMBOL_REF
3323 && GET_CODE (cst
) == CONST_INT
3324 && satisfies_constraint_K (cst
))
3333 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3334 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3335 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3341 constant_to_array (mode
, op
, arr
);
3343 /* Check that each 4-byte slot is identical. */
3345 for (i
= 4; i
< 16; i
+= 4)
3346 for (j
= 0; j
< 4; j
++)
3347 if (arr
[j
] != arr
[i
+ j
])
3352 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3353 val
= trunc_int_for_mode (val
, SImode
);
3355 if (which_immediate_load (val
) != SPU_NONE
)
3359 /* Any mode of 2 bytes or smaller can be loaded with an il
3361 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3365 for (i
= 0; i
< 16 && fsmbi
; i
++)
3366 if (arr
[i
] != 0 && repeat
== 0)
3368 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3371 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3373 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3386 static enum spu_immediate
3387 which_logical_immediate (HOST_WIDE_INT val
)
3389 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3391 if (val
>= -0x200 && val
<= 0x1ff)
3393 if (val
>= 0 && val
<= 0xffff)
3395 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3397 val
= trunc_int_for_mode (val
, HImode
);
3398 if (val
>= -0x200 && val
<= 0x1ff)
3400 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3402 val
= trunc_int_for_mode (val
, QImode
);
3403 if (val
>= -0x200 && val
<= 0x1ff)
3410 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3413 const_vector_immediate_p (rtx x
)
3416 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3417 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3418 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3419 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3425 logical_immediate_p (rtx op
, enum machine_mode mode
)
3428 unsigned char arr
[16];
3431 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3432 || GET_CODE (op
) == CONST_VECTOR
);
3434 if (GET_CODE (op
) == CONST_VECTOR
3435 && !const_vector_immediate_p (op
))
3438 if (GET_MODE (op
) != VOIDmode
)
3439 mode
= GET_MODE (op
);
3441 constant_to_array (mode
, op
, arr
);
3443 /* Check that bytes are repeated. */
3444 for (i
= 4; i
< 16; i
+= 4)
3445 for (j
= 0; j
< 4; j
++)
3446 if (arr
[j
] != arr
[i
+ j
])
3449 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3450 val
= trunc_int_for_mode (val
, SImode
);
3452 i
= which_logical_immediate (val
);
3453 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3457 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3460 unsigned char arr
[16];
3463 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3464 || GET_CODE (op
) == CONST_VECTOR
);
3466 if (GET_CODE (op
) == CONST_VECTOR
3467 && !const_vector_immediate_p (op
))
3470 if (GET_MODE (op
) != VOIDmode
)
3471 mode
= GET_MODE (op
);
3473 constant_to_array (mode
, op
, arr
);
3475 /* Check that bytes are repeated. */
3476 for (i
= 4; i
< 16; i
+= 4)
3477 for (j
= 0; j
< 4; j
++)
3478 if (arr
[j
] != arr
[i
+ j
])
3481 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3482 val
= trunc_int_for_mode (val
, SImode
);
3484 return val
>= 0 && val
<= 0xffff;
3488 arith_immediate_p (rtx op
, enum machine_mode mode
,
3489 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3492 unsigned char arr
[16];
3495 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3496 || GET_CODE (op
) == CONST_VECTOR
);
3498 if (GET_CODE (op
) == CONST_VECTOR
3499 && !const_vector_immediate_p (op
))
3502 if (GET_MODE (op
) != VOIDmode
)
3503 mode
= GET_MODE (op
);
3505 constant_to_array (mode
, op
, arr
);
3507 if (VECTOR_MODE_P (mode
))
3508 mode
= GET_MODE_INNER (mode
);
3510 bytes
= GET_MODE_SIZE (mode
);
3511 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3513 /* Check that bytes are repeated. */
3514 for (i
= bytes
; i
< 16; i
+= bytes
)
3515 for (j
= 0; j
< bytes
; j
++)
3516 if (arr
[j
] != arr
[i
+ j
])
3520 for (j
= 1; j
< bytes
; j
++)
3521 val
= (val
<< 8) | arr
[j
];
3523 val
= trunc_int_for_mode (val
, mode
);
3525 return val
>= low
&& val
<= high
;
3528 /* TRUE when op is an immediate and an exact power of 2, and given that
3529 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3530 all entries must be the same. */
3532 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3534 enum machine_mode int_mode
;
3536 unsigned char arr
[16];
3539 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3540 || GET_CODE (op
) == CONST_VECTOR
);
3542 if (GET_CODE (op
) == CONST_VECTOR
3543 && !const_vector_immediate_p (op
))
3546 if (GET_MODE (op
) != VOIDmode
)
3547 mode
= GET_MODE (op
);
3549 constant_to_array (mode
, op
, arr
);
3551 if (VECTOR_MODE_P (mode
))
3552 mode
= GET_MODE_INNER (mode
);
3554 bytes
= GET_MODE_SIZE (mode
);
3555 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3557 /* Check that bytes are repeated. */
3558 for (i
= bytes
; i
< 16; i
+= bytes
)
3559 for (j
= 0; j
< bytes
; j
++)
3560 if (arr
[j
] != arr
[i
+ j
])
3564 for (j
= 1; j
< bytes
; j
++)
3565 val
= (val
<< 8) | arr
[j
];
3567 val
= trunc_int_for_mode (val
, int_mode
);
3569 /* Currently, we only handle SFmode */
3570 gcc_assert (mode
== SFmode
);
3573 int exp
= (val
>> 23) - 127;
3574 return val
> 0 && (val
& 0x007fffff) == 0
3575 && exp
>= low
&& exp
<= high
;
3581 - any 32-bit constant (SImode, SFmode)
3582 - any constant that can be generated with fsmbi (any mode)
3583 - a 64-bit constant where the high and low bits are identical
3585 - a 128-bit constant where the four 32-bit words match. */
3587 spu_legitimate_constant_p (rtx x
)
3589 if (GET_CODE (x
) == HIGH
)
3591 /* V4SI with all identical symbols is valid. */
3593 && GET_MODE (x
) == V4SImode
3594 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3595 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3596 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3597 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3598 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3599 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3601 if (GET_CODE (x
) == CONST_VECTOR
3602 && !const_vector_immediate_p (x
))
3607 /* Valid address are:
3608 - symbol_ref, label_ref, const
3610 - reg + const, where either reg or const is 16 byte aligned
3611 - reg + reg, alignment doesn't matter
3612 The alignment matters in the reg+const case because lqd and stqd
3613 ignore the 4 least significant bits of the const. (TODO: It might be
3614 preferable to allow any alignment and fix it up when splitting.) */
3616 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED
,
3617 rtx x
, int reg_ok_strict
)
3619 if (mode
== TImode
&& GET_CODE (x
) == AND
3620 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3621 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) -16)
3623 switch (GET_CODE (x
))
3627 return !TARGET_LARGE_MEM
;
3630 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3632 rtx sym
= XEXP (XEXP (x
, 0), 0);
3633 rtx cst
= XEXP (XEXP (x
, 0), 1);
3635 /* Accept any symbol_ref + constant, assuming it does not
3636 wrap around the local store addressability limit. */
3637 if (GET_CODE (sym
) == SYMBOL_REF
&& GET_CODE (cst
) == CONST_INT
)
3643 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3647 gcc_assert (GET_CODE (x
) == REG
);
3650 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3655 rtx op0
= XEXP (x
, 0);
3656 rtx op1
= XEXP (x
, 1);
3657 if (GET_CODE (op0
) == SUBREG
)
3658 op0
= XEXP (op0
, 0);
3659 if (GET_CODE (op1
) == SUBREG
)
3660 op1
= XEXP (op1
, 0);
3661 /* We can't just accept any aligned register because CSE can
3662 change it to a register that is not marked aligned and then
3663 recog will fail. So we only accept frame registers because
3664 they will only be changed to other frame registers. */
3665 if (GET_CODE (op0
) == REG
3666 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3667 && GET_CODE (op1
) == CONST_INT
3668 && INTVAL (op1
) >= -0x2000
3669 && INTVAL (op1
) <= 0x1fff
3670 && (regno_aligned_for_load (REGNO (op0
)) || (INTVAL (op1
) & 15) == 0))
3672 if (GET_CODE (op0
) == REG
3673 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3674 && GET_CODE (op1
) == REG
3675 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3686 /* When the address is reg + const_int, force the const_int into a
3689 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3690 enum machine_mode mode ATTRIBUTE_UNUSED
)
3693 /* Make sure both operands are registers. */
3694 if (GET_CODE (x
) == PLUS
)
3698 if (ALIGNED_SYMBOL_REF_P (op0
))
3700 op0
= force_reg (Pmode
, op0
);
3701 mark_reg_pointer (op0
, 128);
3703 else if (GET_CODE (op0
) != REG
)
3704 op0
= force_reg (Pmode
, op0
);
3705 if (ALIGNED_SYMBOL_REF_P (op1
))
3707 op1
= force_reg (Pmode
, op1
);
3708 mark_reg_pointer (op1
, 128);
3710 else if (GET_CODE (op1
) != REG
)
3711 op1
= force_reg (Pmode
, op1
);
3712 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3717 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3718 struct attribute_spec.handler. */
3720 spu_handle_fndecl_attribute (tree
* node
,
3722 tree args ATTRIBUTE_UNUSED
,
3723 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3725 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3727 warning (0, "`%s' attribute only applies to functions",
3728 IDENTIFIER_POINTER (name
));
3729 *no_add_attrs
= true;
3735 /* Handle the "vector" attribute. */
3737 spu_handle_vector_attribute (tree
* node
, tree name
,
3738 tree args ATTRIBUTE_UNUSED
,
3739 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3741 tree type
= *node
, result
= NULL_TREE
;
3742 enum machine_mode mode
;
3745 while (POINTER_TYPE_P (type
)
3746 || TREE_CODE (type
) == FUNCTION_TYPE
3747 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3748 type
= TREE_TYPE (type
);
3750 mode
= TYPE_MODE (type
);
3752 unsigned_p
= TYPE_UNSIGNED (type
);
3756 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3759 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3762 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3765 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3768 result
= V4SF_type_node
;
3771 result
= V2DF_type_node
;
3777 /* Propagate qualifiers attached to the element type
3778 onto the vector type. */
3779 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3780 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3782 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3785 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name
));
3787 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3792 /* Return nonzero if FUNC is a naked function. */
3794 spu_naked_function_p (tree func
)
3798 if (TREE_CODE (func
) != FUNCTION_DECL
)
3801 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3802 return a
!= NULL_TREE
;
3806 spu_initial_elimination_offset (int from
, int to
)
3808 int saved_regs_size
= spu_saved_regs_size ();
3810 if (!current_function_is_leaf
|| crtl
->outgoing_args_size
3811 || get_frame_size () || saved_regs_size
)
3812 sp_offset
= STACK_POINTER_OFFSET
;
3813 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3814 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3815 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3816 return get_frame_size ();
3817 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3818 return sp_offset
+ crtl
->outgoing_args_size
3819 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3820 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3821 return get_frame_size () + saved_regs_size
+ sp_offset
;
3827 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3829 enum machine_mode mode
= TYPE_MODE (type
);
3830 int byte_size
= ((mode
== BLKmode
)
3831 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3833 /* Make sure small structs are left justified in a register. */
3834 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3835 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3837 enum machine_mode smode
;
3840 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3841 int n
= byte_size
/ UNITS_PER_WORD
;
3842 v
= rtvec_alloc (nregs
);
3843 for (i
= 0; i
< n
; i
++)
3845 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3846 gen_rtx_REG (TImode
,
3849 GEN_INT (UNITS_PER_WORD
* i
));
3850 byte_size
-= UNITS_PER_WORD
;
3858 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3860 gen_rtx_EXPR_LIST (VOIDmode
,
3861 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3862 GEN_INT (UNITS_PER_WORD
* n
));
3864 return gen_rtx_PARALLEL (mode
, v
);
3866 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3870 spu_function_arg (CUMULATIVE_ARGS cum
,
3871 enum machine_mode mode
,
3872 tree type
, int named ATTRIBUTE_UNUSED
)
3876 if (cum
>= MAX_REGISTER_ARGS
)
3879 byte_size
= ((mode
== BLKmode
)
3880 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3882 /* The ABI does not allow parameters to be passed partially in
3883 reg and partially in stack. */
3884 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3887 /* Make sure small structs are left justified in a register. */
3888 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3889 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3891 enum machine_mode smode
;
3895 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3896 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3897 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
3899 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3902 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
3905 /* Variable sized types are passed by reference. */
3907 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
3908 enum machine_mode mode ATTRIBUTE_UNUSED
,
3909 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3911 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3917 /* Create and return the va_list datatype.
3919 On SPU, va_list is an array type equivalent to
3921 typedef struct __va_list_tag
3923 void *__args __attribute__((__aligned(16)));
3924 void *__skip __attribute__((__aligned(16)));
3928 where __args points to the arg that will be returned by the next
3929 va_arg(), and __skip points to the previous stack frame such that
3930 when __args == __skip we should advance __args by 32 bytes. */
3932 spu_build_builtin_va_list (void)
3934 tree f_args
, f_skip
, record
, type_decl
;
3937 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3940 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3942 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3943 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3945 DECL_FIELD_CONTEXT (f_args
) = record
;
3946 DECL_ALIGN (f_args
) = 128;
3947 DECL_USER_ALIGN (f_args
) = 1;
3949 DECL_FIELD_CONTEXT (f_skip
) = record
;
3950 DECL_ALIGN (f_skip
) = 128;
3951 DECL_USER_ALIGN (f_skip
) = 1;
3953 TREE_CHAIN (record
) = type_decl
;
3954 TYPE_NAME (record
) = type_decl
;
3955 TYPE_FIELDS (record
) = f_args
;
3956 TREE_CHAIN (f_args
) = f_skip
;
3958 /* We know this is being padded and we want it too. It is an internal
3959 type so hide the warnings from the user. */
3961 warn_padded
= false;
3963 layout_type (record
);
3967 /* The correct type is an array type of one element. */
3968 return build_array_type (record
, build_index_type (size_zero_node
));
3971 /* Implement va_start by filling the va_list structure VALIST.
3972 NEXTARG points to the first anonymous stack argument.
3974 The following global variables are used to initialize
3975 the va_list structure:
3978 the CUMULATIVE_ARGS for this function
3980 crtl->args.arg_offset_rtx:
3981 holds the offset of the first anonymous stack argument
3982 (relative to the virtual arg pointer). */
3985 spu_va_start (tree valist
, rtx nextarg
)
3987 tree f_args
, f_skip
;
3990 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3991 f_skip
= TREE_CHAIN (f_args
);
3993 valist
= build_va_arg_indirect_ref (valist
);
3995 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3997 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3999 /* Find the __args area. */
4000 t
= make_tree (TREE_TYPE (args
), nextarg
);
4001 if (crtl
->args
.pretend_args_size
> 0)
4002 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
4003 size_int (-STACK_POINTER_OFFSET
));
4004 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4005 TREE_SIDE_EFFECTS (t
) = 1;
4006 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4008 /* Find the __skip area. */
4009 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4010 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
4011 size_int (crtl
->args
.pretend_args_size
4012 - STACK_POINTER_OFFSET
));
4013 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4014 TREE_SIDE_EFFECTS (t
) = 1;
4015 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4018 /* Gimplify va_arg by updating the va_list structure
4019 VALIST as required to retrieve an argument of type
4020 TYPE, and returning that argument.
4022 ret = va_arg(VALIST, TYPE);
4024 generates code equivalent to:
4026 paddedsize = (sizeof(TYPE) + 15) & -16;
4027 if (VALIST.__args + paddedsize > VALIST.__skip
4028 && VALIST.__args <= VALIST.__skip)
4029 addr = VALIST.__skip + 32;
4031 addr = VALIST.__args;
4032 VALIST.__args = addr + paddedsize;
4033 ret = *(TYPE *)addr;
4036 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4037 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4039 tree f_args
, f_skip
;
4041 HOST_WIDE_INT size
, rsize
;
4042 tree paddedsize
, addr
, tmp
;
4043 bool pass_by_reference_p
;
4045 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4046 f_skip
= TREE_CHAIN (f_args
);
4048 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4050 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4052 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4054 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4055 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4057 /* if an object is dynamically sized, a pointer to it is passed
4058 instead of the object itself. */
4059 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4061 if (pass_by_reference_p
)
4062 type
= build_pointer_type (type
);
4063 size
= int_size_in_bytes (type
);
4064 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4066 /* build conditional expression to calculate addr. The expression
4067 will be gimplified later. */
4068 paddedsize
= size_int (rsize
);
4069 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (args
), paddedsize
);
4070 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4071 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4072 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4073 unshare_expr (skip
)));
4075 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4076 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (skip
),
4077 size_int (32)), unshare_expr (args
));
4079 gimplify_assign (addr
, tmp
, pre_p
);
4081 /* update VALIST.__args */
4082 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
4083 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4085 addr
= fold_convert (build_pointer_type (type
), addr
);
4087 if (pass_by_reference_p
)
4088 addr
= build_va_arg_indirect_ref (addr
);
4090 return build_va_arg_indirect_ref (addr
);
4093 /* Save parameter registers starting with the register that corresponds
4094 to the first unnamed parameters. If the first unnamed parameter is
4095 in the stack then save no registers. Set pretend_args_size to the
4096 amount of space needed to save the registers. */
4098 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4099 tree type
, int *pretend_size
, int no_rtl
)
4108 /* cum currently points to the last named argument, we want to
4109 start at the next argument. */
4110 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
4112 offset
= -STACK_POINTER_OFFSET
;
4113 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4115 tmp
= gen_frame_mem (V4SImode
,
4116 plus_constant (virtual_incoming_args_rtx
,
4118 emit_move_insn (tmp
,
4119 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4122 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4127 spu_conditional_register_usage (void)
4131 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4132 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4136 /* This is called to decide when we can simplify a load instruction. We
4137 must only return true for registers which we know will always be
4138 aligned. Taking into account that CSE might replace this reg with
4139 another one that has not been marked aligned.
4140 So this is really only true for frame, stack and virtual registers,
4141 which we know are always aligned and should not be adversely effected
4144 regno_aligned_for_load (int regno
)
4146 return regno
== FRAME_POINTER_REGNUM
4147 || (frame_pointer_needed
&& regno
== HARD_FRAME_POINTER_REGNUM
)
4148 || regno
== ARG_POINTER_REGNUM
4149 || regno
== STACK_POINTER_REGNUM
4150 || (regno
>= FIRST_VIRTUAL_REGISTER
4151 && regno
<= LAST_VIRTUAL_REGISTER
);
4154 /* Return TRUE when mem is known to be 16-byte aligned. */
4156 aligned_mem_p (rtx mem
)
4158 if (MEM_ALIGN (mem
) >= 128)
4160 if (GET_MODE_SIZE (GET_MODE (mem
)) >= 16)
4162 if (GET_CODE (XEXP (mem
, 0)) == PLUS
)
4164 rtx p0
= XEXP (XEXP (mem
, 0), 0);
4165 rtx p1
= XEXP (XEXP (mem
, 0), 1);
4166 if (regno_aligned_for_load (REGNO (p0
)))
4168 if (GET_CODE (p1
) == REG
&& regno_aligned_for_load (REGNO (p1
)))
4170 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
4174 else if (GET_CODE (XEXP (mem
, 0)) == REG
)
4176 if (regno_aligned_for_load (REGNO (XEXP (mem
, 0))))
4179 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem
, 0)))
4181 else if (GET_CODE (XEXP (mem
, 0)) == CONST
)
4183 rtx p0
= XEXP (XEXP (XEXP (mem
, 0), 0), 0);
4184 rtx p1
= XEXP (XEXP (XEXP (mem
, 0), 0), 1);
4185 if (GET_CODE (p0
) == SYMBOL_REF
4186 && GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
4192 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4193 into its SYMBOL_REF_FLAGS. */
4195 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4197 default_encode_section_info (decl
, rtl
, first
);
4199 /* If a variable has a forced alignment to < 16 bytes, mark it with
4200 SYMBOL_FLAG_ALIGN1. */
4201 if (TREE_CODE (decl
) == VAR_DECL
4202 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4203 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4206 /* Return TRUE if we are certain the mem refers to a complete object
4207 which is both 16-byte aligned and padded to a 16-byte boundary. This
4208 would make it safe to store with a single instruction.
4209 We guarantee the alignment and padding for static objects by aligning
4210 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4211 FIXME: We currently cannot guarantee this for objects on the stack
4212 because assign_parm_setup_stack calls assign_stack_local with the
4213 alignment of the parameter mode and in that case the alignment never
4214 gets adjusted by LOCAL_ALIGNMENT. */
4216 store_with_one_insn_p (rtx mem
)
4218 rtx addr
= XEXP (mem
, 0);
4219 if (GET_MODE (mem
) == BLKmode
)
4221 /* Only static objects. */
4222 if (GET_CODE (addr
) == SYMBOL_REF
)
4224 /* We use the associated declaration to make sure the access is
4225 referring to the whole object.
4226 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4227 if it is necessary. Will there be cases where one exists, and
4228 the other does not? Will there be cases where both exist, but
4229 have different types? */
4230 tree decl
= MEM_EXPR (mem
);
4232 && TREE_CODE (decl
) == VAR_DECL
4233 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4235 decl
= SYMBOL_REF_DECL (addr
);
4237 && TREE_CODE (decl
) == VAR_DECL
4238 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4245 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4247 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4250 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4252 rtx from
= SUBREG_REG (ops
[1]);
4253 enum machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4255 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4256 && GET_MODE_CLASS (imode
) == MODE_INT
4257 && subreg_lowpart_p (ops
[1]));
4259 if (GET_MODE_SIZE (imode
) < 4)
4261 if (imode
!= GET_MODE (from
))
4262 from
= gen_rtx_SUBREG (imode
, from
, 0);
4264 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4266 enum insn_code icode
= convert_optab_handler (trunc_optab
, mode
, imode
)->insn_code
;
4267 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4270 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4274 /* At least one of the operands needs to be a register. */
4275 if ((reload_in_progress
| reload_completed
) == 0
4276 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4278 rtx temp
= force_reg (mode
, ops
[1]);
4279 emit_move_insn (ops
[0], temp
);
4282 if (reload_in_progress
|| reload_completed
)
4284 if (CONSTANT_P (ops
[1]))
4285 return spu_split_immediate (ops
);
4290 if (GET_CODE (ops
[0]) == MEM
)
4292 if (!spu_valid_move (ops
))
4294 emit_insn (gen_store (ops
[0], ops
[1], gen_reg_rtx (TImode
),
4295 gen_reg_rtx (TImode
)));
4299 else if (GET_CODE (ops
[1]) == MEM
)
4301 if (!spu_valid_move (ops
))
4304 (ops
[0], ops
[1], gen_reg_rtx (TImode
),
4305 gen_reg_rtx (SImode
)));
4309 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4311 if (GET_CODE (ops
[1]) == CONST_INT
)
4313 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4314 if (val
!= INTVAL (ops
[1]))
4316 emit_move_insn (ops
[0], GEN_INT (val
));
4325 spu_split_load (rtx
* ops
)
4327 enum machine_mode mode
= GET_MODE (ops
[0]);
4328 rtx addr
, load
, rot
, mem
, p0
, p1
;
4331 addr
= XEXP (ops
[1], 0);
4335 if (GET_CODE (addr
) == PLUS
)
4338 aligned reg + aligned reg => lqx
4339 aligned reg + unaligned reg => lqx, rotqby
4340 aligned reg + aligned const => lqd
4341 aligned reg + unaligned const => lqd, rotqbyi
4342 unaligned reg + aligned reg => lqx, rotqby
4343 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4344 unaligned reg + aligned const => lqd, rotqby
4345 unaligned reg + unaligned const -> not allowed by legitimate address
4347 p0
= XEXP (addr
, 0);
4348 p1
= XEXP (addr
, 1);
4349 if (REG_P (p0
) && !regno_aligned_for_load (REGNO (p0
)))
4351 if (REG_P (p1
) && !regno_aligned_for_load (REGNO (p1
)))
4353 emit_insn (gen_addsi3 (ops
[3], p0
, p1
));
4361 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4363 rot_amt
= INTVAL (p1
) & 15;
4364 p1
= GEN_INT (INTVAL (p1
) & -16);
4365 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4367 else if (REG_P (p1
) && !regno_aligned_for_load (REGNO (p1
)))
4371 else if (GET_CODE (addr
) == REG
)
4373 if (!regno_aligned_for_load (REGNO (addr
)))
4376 else if (GET_CODE (addr
) == CONST
)
4378 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4379 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4380 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4382 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4384 addr
= gen_rtx_CONST (Pmode
,
4385 gen_rtx_PLUS (Pmode
,
4386 XEXP (XEXP (addr
, 0), 0),
4387 GEN_INT (rot_amt
& -16)));
4389 addr
= XEXP (XEXP (addr
, 0), 0);
4394 else if (GET_CODE (addr
) == CONST_INT
)
4396 rot_amt
= INTVAL (addr
);
4397 addr
= GEN_INT (rot_amt
& -16);
4399 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4402 if (GET_MODE_SIZE (mode
) < 4)
4403 rot_amt
+= GET_MODE_SIZE (mode
) - 4;
4409 emit_insn (gen_addsi3 (ops
[3], rot
, GEN_INT (rot_amt
)));
4416 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4417 mem
= change_address (ops
[1], TImode
, addr
);
4419 emit_insn (gen_movti (load
, mem
));
4422 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4424 emit_insn (gen_rotlti3 (load
, load
, GEN_INT (rot_amt
* 8)));
4426 if (reload_completed
)
4427 emit_move_insn (ops
[0], gen_rtx_REG (GET_MODE (ops
[0]), REGNO (load
)));
4429 emit_insn (gen_spu_convert (ops
[0], load
));
4433 spu_split_store (rtx
* ops
)
4435 enum machine_mode mode
= GET_MODE (ops
[0]);
4438 rtx addr
, p0
, p1
, p1_lo
, smem
;
4442 addr
= XEXP (ops
[0], 0);
4444 if (GET_CODE (addr
) == PLUS
)
4447 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4448 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4449 aligned reg + aligned const => lqd, c?d, shuf, stqx
4450 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4451 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4452 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4453 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4454 unaligned reg + unaligned const -> not allowed by legitimate address
4457 p0
= XEXP (addr
, 0);
4458 p1
= p1_lo
= XEXP (addr
, 1);
4459 if (GET_CODE (p0
) == REG
&& GET_CODE (p1
) == CONST_INT
)
4461 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4462 p1
= GEN_INT (INTVAL (p1
) & -16);
4463 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4466 else if (GET_CODE (addr
) == REG
)
4470 p1
= p1_lo
= const0_rtx
;
4475 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4476 p1
= 0; /* aform doesn't use p1 */
4478 if (ALIGNED_SYMBOL_REF_P (addr
))
4480 else if (GET_CODE (addr
) == CONST
)
4482 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4483 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4484 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4486 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4488 addr
= gen_rtx_CONST (Pmode
,
4489 gen_rtx_PLUS (Pmode
,
4490 XEXP (XEXP (addr
, 0), 0),
4491 GEN_INT (v
& -16)));
4493 addr
= XEXP (XEXP (addr
, 0), 0);
4494 p1_lo
= GEN_INT (v
& 15);
4497 else if (GET_CODE (addr
) == CONST_INT
)
4499 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4500 addr
= GEN_INT (INTVAL (addr
) & -16);
4504 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4506 scalar
= store_with_one_insn_p (ops
[0]);
4509 /* We could copy the flags from the ops[0] MEM to mem here,
4510 We don't because we want this load to be optimized away if
4511 possible, and copying the flags will prevent that in certain
4512 cases, e.g. consider the volatile flag. */
4514 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4515 set_mem_alias_set (lmem
, 0);
4516 emit_insn (gen_movti (reg
, lmem
));
4518 if (!p0
|| regno_aligned_for_load (REGNO (p0
)))
4519 p0
= stack_pointer_rtx
;
4523 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4524 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
4526 else if (reload_completed
)
4528 if (GET_CODE (ops
[1]) == REG
)
4529 emit_move_insn (reg
, gen_rtx_REG (GET_MODE (reg
), REGNO (ops
[1])));
4530 else if (GET_CODE (ops
[1]) == SUBREG
)
4531 emit_move_insn (reg
,
4532 gen_rtx_REG (GET_MODE (reg
),
4533 REGNO (SUBREG_REG (ops
[1]))));
4539 if (GET_CODE (ops
[1]) == REG
)
4540 emit_insn (gen_spu_convert (reg
, ops
[1]));
4541 else if (GET_CODE (ops
[1]) == SUBREG
)
4542 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
4547 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
4548 emit_insn (gen_shlqby_ti
4549 (reg
, reg
, GEN_INT (4 - GET_MODE_SIZE (mode
))));
4551 smem
= change_address (ops
[0], TImode
, addr
);
4552 /* We can't use the previous alias set because the memory has changed
4553 size and can potentially overlap objects of other types. */
4554 set_mem_alias_set (smem
, 0);
4556 emit_insn (gen_movti (smem
, reg
));
4559 /* Return TRUE if X is MEM which is a struct member reference
4560 and the member can safely be loaded and stored with a single
4561 instruction because it is padded. */
4563 mem_is_padded_component_ref (rtx x
)
4565 tree t
= MEM_EXPR (x
);
4567 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
4569 t
= TREE_OPERAND (t
, 1);
4570 if (!t
|| TREE_CODE (t
) != FIELD_DECL
4571 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
4573 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4574 r
= DECL_FIELD_CONTEXT (t
);
4575 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
4577 /* Make sure they are the same mode */
4578 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
4580 /* If there are no following fields then the field alignment assures
4581 the structure is padded to the alignment which means this field is
4583 if (TREE_CHAIN (t
) == 0)
4585 /* If the following field is also aligned then this field will be
4588 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
4593 /* Parse the -mfixed-range= option string. */
4595 fix_range (const char *const_str
)
4598 char *str
, *dash
, *comma
;
4600 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4601 REG2 are either register names or register numbers. The effect
4602 of this option is to mark the registers in the range from REG1 to
4603 REG2 as ``fixed'' so they won't be used by the compiler. */
4605 i
= strlen (const_str
);
4606 str
= (char *) alloca (i
+ 1);
4607 memcpy (str
, const_str
, i
+ 1);
4611 dash
= strchr (str
, '-');
4614 warning (0, "value of -mfixed-range must have form REG1-REG2");
4618 comma
= strchr (dash
+ 1, ',');
4622 first
= decode_reg_name (str
);
4625 warning (0, "unknown register name: %s", str
);
4629 last
= decode_reg_name (dash
+ 1);
4632 warning (0, "unknown register name: %s", dash
+ 1);
4640 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
4644 for (i
= first
; i
<= last
; ++i
)
4645 fixed_regs
[i
] = call_used_regs
[i
] = 1;
4656 spu_valid_move (rtx
* ops
)
4658 enum machine_mode mode
= GET_MODE (ops
[0]);
4659 if (!register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4662 /* init_expr_once tries to recog against load and store insns to set
4663 the direct_load[] and direct_store[] arrays. We always want to
4664 consider those loads and stores valid. init_expr_once is called in
4665 the context of a dummy function which does not have a decl. */
4666 if (cfun
->decl
== 0)
4669 /* Don't allows loads/stores which would require more than 1 insn.
4670 During and after reload we assume loads and stores only take 1
4672 if (GET_MODE_SIZE (mode
) < 16 && !reload_in_progress
&& !reload_completed
)
4674 if (GET_CODE (ops
[0]) == MEM
4675 && (GET_MODE_SIZE (mode
) < 4
4676 || !(store_with_one_insn_p (ops
[0])
4677 || mem_is_padded_component_ref (ops
[0]))))
4679 if (GET_CODE (ops
[1]) == MEM
4680 && (GET_MODE_SIZE (mode
) < 4 || !aligned_mem_p (ops
[1])))
4686 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4687 can be generated using the fsmbi instruction. */
4689 fsmbi_const_p (rtx x
)
4693 /* We can always choose TImode for CONST_INT because the high bits
4694 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4695 enum immediate_class c
= classify_immediate (x
, TImode
);
4696 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
4701 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4702 can be generated using the cbd, chd, cwd or cdd instruction. */
4704 cpat_const_p (rtx x
, enum machine_mode mode
)
4708 enum immediate_class c
= classify_immediate (x
, mode
);
4709 return c
== IC_CPAT
;
4715 gen_cpat_const (rtx
* ops
)
4717 unsigned char dst
[16];
4718 int i
, offset
, shift
, isize
;
4719 if (GET_CODE (ops
[3]) != CONST_INT
4720 || GET_CODE (ops
[2]) != CONST_INT
4721 || (GET_CODE (ops
[1]) != CONST_INT
4722 && GET_CODE (ops
[1]) != REG
))
4724 if (GET_CODE (ops
[1]) == REG
4725 && (!REG_POINTER (ops
[1])
4726 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
4729 for (i
= 0; i
< 16; i
++)
4731 isize
= INTVAL (ops
[3]);
4734 else if (isize
== 2)
4738 offset
= (INTVAL (ops
[2]) +
4739 (GET_CODE (ops
[1]) ==
4740 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
4741 for (i
= 0; i
< isize
; i
++)
4742 dst
[offset
+ i
] = i
+ shift
;
4743 return array_to_constant (TImode
, dst
);
4746 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4747 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4748 than 16 bytes, the value is repeated across the rest of the array. */
4750 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
4755 memset (arr
, 0, 16);
4756 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
4757 if (GET_CODE (x
) == CONST_INT
4758 || (GET_CODE (x
) == CONST_DOUBLE
4759 && (mode
== SFmode
|| mode
== DFmode
)))
4761 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
4763 if (GET_CODE (x
) == CONST_DOUBLE
)
4764 val
= const_double_to_hwint (x
);
4767 first
= GET_MODE_SIZE (mode
) - 1;
4768 for (i
= first
; i
>= 0; i
--)
4770 arr
[i
] = val
& 0xff;
4773 /* Splat the constant across the whole array. */
4774 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
4777 j
= (j
== first
) ? 0 : j
+ 1;
4780 else if (GET_CODE (x
) == CONST_DOUBLE
)
4782 val
= CONST_DOUBLE_LOW (x
);
4783 for (i
= 15; i
>= 8; i
--)
4785 arr
[i
] = val
& 0xff;
4788 val
= CONST_DOUBLE_HIGH (x
);
4789 for (i
= 7; i
>= 0; i
--)
4791 arr
[i
] = val
& 0xff;
4795 else if (GET_CODE (x
) == CONST_VECTOR
)
4799 mode
= GET_MODE_INNER (mode
);
4800 units
= CONST_VECTOR_NUNITS (x
);
4801 for (i
= 0; i
< units
; i
++)
4803 elt
= CONST_VECTOR_ELT (x
, i
);
4804 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
4806 if (GET_CODE (elt
) == CONST_DOUBLE
)
4807 val
= const_double_to_hwint (elt
);
4810 first
= GET_MODE_SIZE (mode
) - 1;
4811 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
4813 for (j
= first
; j
>= 0; j
--)
4815 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
4825 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4826 smaller than 16 bytes, use the bytes that would represent that value
4827 in a register, e.g., for QImode return the value of arr[3]. */
4829 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
4831 enum machine_mode inner_mode
;
4833 int units
, size
, i
, j
, k
;
4836 if (GET_MODE_CLASS (mode
) == MODE_INT
4837 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
4839 j
= GET_MODE_SIZE (mode
);
4840 i
= j
< 4 ? 4 - j
: 0;
4841 for (val
= 0; i
< j
; i
++)
4842 val
= (val
<< 8) | arr
[i
];
4843 val
= trunc_int_for_mode (val
, mode
);
4844 return GEN_INT (val
);
4850 for (i
= high
= 0; i
< 8; i
++)
4851 high
= (high
<< 8) | arr
[i
];
4852 for (i
= 8, val
= 0; i
< 16; i
++)
4853 val
= (val
<< 8) | arr
[i
];
4854 return immed_double_const (val
, high
, TImode
);
4858 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4859 val
= trunc_int_for_mode (val
, SImode
);
4860 return hwint_to_const_double (SFmode
, val
);
4864 for (i
= 0, val
= 0; i
< 8; i
++)
4865 val
= (val
<< 8) | arr
[i
];
4866 return hwint_to_const_double (DFmode
, val
);
4869 if (!VECTOR_MODE_P (mode
))
4872 units
= GET_MODE_NUNITS (mode
);
4873 size
= GET_MODE_UNIT_SIZE (mode
);
4874 inner_mode
= GET_MODE_INNER (mode
);
4875 v
= rtvec_alloc (units
);
4877 for (k
= i
= 0; i
< units
; ++i
)
4880 for (j
= 0; j
< size
; j
++, k
++)
4881 val
= (val
<< 8) | arr
[k
];
4883 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
4884 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
4886 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
4891 return gen_rtx_CONST_VECTOR (mode
, v
);
4895 reloc_diagnostic (rtx x
)
4897 tree loc_decl
, decl
= 0;
4899 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
4902 if (GET_CODE (x
) == SYMBOL_REF
)
4903 decl
= SYMBOL_REF_DECL (x
);
4904 else if (GET_CODE (x
) == CONST
4905 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4906 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
4908 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4909 if (decl
&& !DECL_P (decl
))
4912 /* We use last_assemble_variable_decl to get line information. It's
4913 not always going to be right and might not even be close, but will
4914 be right for the more common cases. */
4915 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
4918 loc_decl
= last_assemble_variable_decl
;
4920 /* The decl could be a string constant. */
4921 if (decl
&& DECL_P (decl
))
4922 msg
= "%Jcreating run-time relocation for %qD";
4924 msg
= "creating run-time relocation";
4926 if (TARGET_WARN_RELOC
)
4927 warning (0, msg
, loc_decl
, decl
);
4929 error (msg
, loc_decl
, decl
);
4932 /* Hook into assemble_integer so we can generate an error for run-time
4933 relocations. The SPU ABI disallows them. */
4935 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4937 /* By default run-time relocations aren't supported, but we allow them
4938 in case users support it in their own run-time loader. And we provide
4939 a warning for those users that don't. */
4940 if ((GET_CODE (x
) == SYMBOL_REF
)
4941 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
4942 reloc_diagnostic (x
);
4944 return default_assemble_integer (x
, size
, aligned_p
);
4948 spu_asm_globalize_label (FILE * file
, const char *name
)
4950 fputs ("\t.global\t", file
);
4951 assemble_name (file
, name
);
4956 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
,
4957 bool speed ATTRIBUTE_UNUSED
)
4959 enum machine_mode mode
= GET_MODE (x
);
4960 int cost
= COSTS_N_INSNS (2);
4962 /* Folding to a CONST_VECTOR will use extra space but there might
4963 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4964 only if it allows us to fold away multiple insns. Changing the cost
4965 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4966 because this cost will only be compared against a single insn.
4967 if (code == CONST_VECTOR)
4968 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4971 /* Use defaults for float operations. Not accurate but good enough. */
4974 *total
= COSTS_N_INSNS (13);
4979 *total
= COSTS_N_INSNS (6);
4985 if (satisfies_constraint_K (x
))
4987 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
4988 *total
= COSTS_N_INSNS (1);
4990 *total
= COSTS_N_INSNS (3);
4994 *total
= COSTS_N_INSNS (3);
4999 *total
= COSTS_N_INSNS (0);
5003 *total
= COSTS_N_INSNS (5);
5007 case FLOAT_TRUNCATE
:
5009 case UNSIGNED_FLOAT
:
5012 *total
= COSTS_N_INSNS (7);
5018 *total
= COSTS_N_INSNS (9);
5025 GET_CODE (XEXP (x
, 0)) ==
5026 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5027 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5029 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5031 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5032 cost
= COSTS_N_INSNS (14);
5033 if ((val
& 0xffff) == 0)
5034 cost
= COSTS_N_INSNS (9);
5035 else if (val
> 0 && val
< 0x10000)
5036 cost
= COSTS_N_INSNS (11);
5045 *total
= COSTS_N_INSNS (20);
5052 *total
= COSTS_N_INSNS (4);
5055 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5056 *total
= COSTS_N_INSNS (0);
5058 *total
= COSTS_N_INSNS (4);
5061 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5062 if (GET_MODE_CLASS (mode
) == MODE_INT
5063 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5064 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5065 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5070 static enum machine_mode
5071 spu_unwind_word_mode (void)
5076 /* Decide whether we can make a sibling call to a function. DECL is the
5077 declaration of the function being targeted by the call and EXP is the
5078 CALL_EXPR representing the call. */
5080 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5082 return decl
&& !TARGET_LARGE_MEM
;
5085 /* We need to correctly update the back chain pointer and the Available
5086 Stack Size (which is in the second slot of the sp register.) */
5088 spu_allocate_stack (rtx op0
, rtx op1
)
5091 rtx chain
= gen_reg_rtx (V4SImode
);
5092 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5093 rtx sp
= gen_reg_rtx (V4SImode
);
5094 rtx splatted
= gen_reg_rtx (V4SImode
);
5095 rtx pat
= gen_reg_rtx (TImode
);
5097 /* copy the back chain so we can save it back again. */
5098 emit_move_insn (chain
, stack_bot
);
5100 op1
= force_reg (SImode
, op1
);
5102 v
= 0x1020300010203ll
;
5103 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5104 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5106 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5107 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5109 if (flag_stack_check
)
5111 rtx avail
= gen_reg_rtx(SImode
);
5112 rtx result
= gen_reg_rtx(SImode
);
5113 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5114 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5115 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5118 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5120 emit_move_insn (stack_bot
, chain
);
5122 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5126 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5128 static unsigned char arr
[16] =
5129 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5130 rtx temp
= gen_reg_rtx (SImode
);
5131 rtx temp2
= gen_reg_rtx (SImode
);
5132 rtx temp3
= gen_reg_rtx (V4SImode
);
5133 rtx temp4
= gen_reg_rtx (V4SImode
);
5134 rtx pat
= gen_reg_rtx (TImode
);
5135 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5137 /* Restore the backchain from the first word, sp from the second. */
5138 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5139 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5141 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5143 /* Compute Available Stack Size for sp */
5144 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5145 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5147 /* Compute Available Stack Size for back chain */
5148 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5149 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5150 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5152 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5153 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5157 spu_init_libfuncs (void)
5159 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5160 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5161 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5162 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5163 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5164 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5165 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5166 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5167 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5168 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5169 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5171 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5172 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5174 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5175 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5176 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5177 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5178 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5179 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5182 /* Make a subreg, stripping any existing subreg. We could possibly just
5183 call simplify_subreg, but in this case we know what we want. */
5185 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5187 if (GET_CODE (x
) == SUBREG
)
5189 if (GET_MODE (x
) == mode
)
5191 return gen_rtx_SUBREG (mode
, x
, 0);
5195 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5197 return (TYPE_MODE (type
) == BLKmode
5199 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5200 || int_size_in_bytes (type
) >
5201 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5204 /* Create the built-in types and functions */
5206 enum spu_function_code
5208 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5209 #include "spu-builtins.def"
5214 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5216 struct spu_builtin_description spu_builtins
[] = {
5217 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5218 {fcode, icode, name, type, params, NULL_TREE},
5219 #include "spu-builtins.def"
5224 spu_init_builtins (void)
5226 struct spu_builtin_description
*d
;
5229 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5230 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5231 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5232 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5233 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5234 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5236 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5237 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5238 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5239 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5241 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5243 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5244 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5245 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5246 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5247 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5248 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5249 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5250 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5251 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5252 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5253 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5254 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5256 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5257 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5258 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5259 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5260 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5261 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5262 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5263 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5265 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5266 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5268 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5270 spu_builtin_types
[SPU_BTI_PTR
] =
5271 build_pointer_type (build_qualified_type
5273 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5275 /* For each builtin we build a new prototype. The tree code will make
5276 sure nodes are shared. */
5277 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5280 char name
[64]; /* build_function will make a copy. */
5286 /* Find last parm. */
5287 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5292 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5294 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5296 sprintf (name
, "__builtin_%s", d
->name
);
5298 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
5300 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5301 TREE_READONLY (d
->fndecl
) = 1;
5303 /* These builtins don't throw. */
5304 TREE_NOTHROW (d
->fndecl
) = 1;
5309 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5311 static unsigned char arr
[16] =
5312 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5314 rtx temp
= gen_reg_rtx (Pmode
);
5315 rtx temp2
= gen_reg_rtx (V4SImode
);
5316 rtx temp3
= gen_reg_rtx (V4SImode
);
5317 rtx pat
= gen_reg_rtx (TImode
);
5318 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5320 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5322 /* Restore the sp. */
5323 emit_move_insn (temp
, op1
);
5324 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5326 /* Compute available stack size for sp. */
5327 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5328 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5330 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5331 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5335 spu_safe_dma (HOST_WIDE_INT channel
)
5337 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5341 spu_builtin_splats (rtx ops
[])
5343 enum machine_mode mode
= GET_MODE (ops
[0]);
5344 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5346 unsigned char arr
[16];
5347 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5348 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5352 rtx reg
= gen_reg_rtx (TImode
);
5354 if (GET_CODE (ops
[1]) != REG
5355 && GET_CODE (ops
[1]) != SUBREG
)
5356 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5362 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5368 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5373 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5378 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5384 emit_move_insn (reg
, shuf
);
5385 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5390 spu_builtin_extract (rtx ops
[])
5392 enum machine_mode mode
;
5395 mode
= GET_MODE (ops
[1]);
5397 if (GET_CODE (ops
[2]) == CONST_INT
)
5402 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5405 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5408 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5411 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5414 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5417 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5425 from
= spu_gen_subreg (TImode
, ops
[1]);
5426 rot
= gen_reg_rtx (TImode
);
5427 tmp
= gen_reg_rtx (SImode
);
5432 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5435 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5436 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5440 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5444 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5449 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5451 emit_insn (gen_spu_convert (ops
[0], rot
));
5455 spu_builtin_insert (rtx ops
[])
5457 enum machine_mode mode
= GET_MODE (ops
[0]);
5458 enum machine_mode imode
= GET_MODE_INNER (mode
);
5459 rtx mask
= gen_reg_rtx (TImode
);
5462 if (GET_CODE (ops
[3]) == CONST_INT
)
5463 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5466 offset
= gen_reg_rtx (SImode
);
5467 emit_insn (gen_mulsi3
5468 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5471 (mask
, stack_pointer_rtx
, offset
,
5472 GEN_INT (GET_MODE_SIZE (imode
))));
5473 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5477 spu_builtin_promote (rtx ops
[])
5479 enum machine_mode mode
, imode
;
5480 rtx rot
, from
, offset
;
5483 mode
= GET_MODE (ops
[0]);
5484 imode
= GET_MODE_INNER (mode
);
5486 from
= gen_reg_rtx (TImode
);
5487 rot
= spu_gen_subreg (TImode
, ops
[0]);
5489 emit_insn (gen_spu_convert (from
, ops
[1]));
5491 if (GET_CODE (ops
[2]) == CONST_INT
)
5493 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5494 if (GET_MODE_SIZE (imode
) < 4)
5495 pos
+= 4 - GET_MODE_SIZE (imode
);
5496 offset
= GEN_INT (pos
& 15);
5500 offset
= gen_reg_rtx (SImode
);
5504 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5507 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5508 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5512 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5513 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5517 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5523 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5527 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
5529 rtx shuf
= gen_reg_rtx (V4SImode
);
5530 rtx insn
= gen_reg_rtx (V4SImode
);
5535 fnaddr
= force_reg (SImode
, fnaddr
);
5536 cxt
= force_reg (SImode
, cxt
);
5538 if (TARGET_LARGE_MEM
)
5540 rtx rotl
= gen_reg_rtx (V4SImode
);
5541 rtx mask
= gen_reg_rtx (V4SImode
);
5542 rtx bi
= gen_reg_rtx (SImode
);
5543 unsigned char shufa
[16] = {
5544 2, 3, 0, 1, 18, 19, 16, 17,
5545 0, 1, 2, 3, 16, 17, 18, 19
5547 unsigned char insna
[16] = {
5549 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5551 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5554 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5555 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5557 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
5558 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
5559 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
5560 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
5562 mem
= memory_address (Pmode
, tramp
);
5563 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
5565 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
5566 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
5567 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
5571 rtx scxt
= gen_reg_rtx (SImode
);
5572 rtx sfnaddr
= gen_reg_rtx (SImode
);
5573 unsigned char insna
[16] = {
5574 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
5580 shufc
= gen_reg_rtx (TImode
);
5581 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
5583 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5584 fits 18 bits and the last 4 are zeros. This will be true if
5585 the stack pointer is initialized to 0x3fff0 at program start,
5586 otherwise the ila instruction will be garbage. */
5588 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
5589 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
5591 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
5592 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
5593 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
5595 mem
= memory_address (Pmode
, tramp
);
5596 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
5599 emit_insn (gen_sync ());
5603 spu_expand_sign_extend (rtx ops
[])
5605 unsigned char arr
[16];
5606 rtx pat
= gen_reg_rtx (TImode
);
5609 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
5610 if (GET_MODE (ops
[1]) == QImode
)
5612 sign
= gen_reg_rtx (HImode
);
5613 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
5614 for (i
= 0; i
< 16; i
++)
5620 for (i
= 0; i
< 16; i
++)
5622 switch (GET_MODE (ops
[1]))
5625 sign
= gen_reg_rtx (SImode
);
5626 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
5628 arr
[last
- 1] = 0x02;
5631 sign
= gen_reg_rtx (SImode
);
5632 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
5633 for (i
= 0; i
< 4; i
++)
5634 arr
[last
- i
] = 3 - i
;
5637 sign
= gen_reg_rtx (SImode
);
5638 c
= gen_reg_rtx (SImode
);
5639 emit_insn (gen_spu_convert (c
, ops
[1]));
5640 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
5641 for (i
= 0; i
< 8; i
++)
5642 arr
[last
- i
] = 7 - i
;
5648 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5649 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
5652 /* expand vector initialization. If there are any constant parts,
5653 load constant parts first. Then load any non-constant parts. */
5655 spu_expand_vector_init (rtx target
, rtx vals
)
5657 enum machine_mode mode
= GET_MODE (target
);
5658 int n_elts
= GET_MODE_NUNITS (mode
);
5660 bool all_same
= true;
5661 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
5664 first
= XVECEXP (vals
, 0, 0);
5665 for (i
= 0; i
< n_elts
; ++i
)
5667 x
= XVECEXP (vals
, 0, i
);
5668 if (!(CONST_INT_P (x
)
5669 || GET_CODE (x
) == CONST_DOUBLE
5670 || GET_CODE (x
) == CONST_FIXED
))
5674 if (first_constant
== NULL_RTX
)
5677 if (i
> 0 && !rtx_equal_p (x
, first
))
5681 /* if all elements are the same, use splats to repeat elements */
5684 if (!CONSTANT_P (first
)
5685 && !register_operand (first
, GET_MODE (x
)))
5686 first
= force_reg (GET_MODE (first
), first
);
5687 emit_insn (gen_spu_splats (target
, first
));
5691 /* load constant parts */
5692 if (n_var
!= n_elts
)
5696 emit_move_insn (target
,
5697 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
5701 rtx constant_parts_rtx
= copy_rtx (vals
);
5703 gcc_assert (first_constant
!= NULL_RTX
);
5704 /* fill empty slots with the first constant, this increases
5705 our chance of using splats in the recursive call below. */
5706 for (i
= 0; i
< n_elts
; ++i
)
5708 x
= XVECEXP (constant_parts_rtx
, 0, i
);
5709 if (!(CONST_INT_P (x
)
5710 || GET_CODE (x
) == CONST_DOUBLE
5711 || GET_CODE (x
) == CONST_FIXED
))
5712 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
5715 spu_expand_vector_init (target
, constant_parts_rtx
);
5719 /* load variable parts */
5722 rtx insert_operands
[4];
5724 insert_operands
[0] = target
;
5725 insert_operands
[2] = target
;
5726 for (i
= 0; i
< n_elts
; ++i
)
5728 x
= XVECEXP (vals
, 0, i
);
5729 if (!(CONST_INT_P (x
)
5730 || GET_CODE (x
) == CONST_DOUBLE
5731 || GET_CODE (x
) == CONST_FIXED
))
5733 if (!register_operand (x
, GET_MODE (x
)))
5734 x
= force_reg (GET_MODE (x
), x
);
5735 insert_operands
[1] = x
;
5736 insert_operands
[3] = GEN_INT (i
);
5737 spu_builtin_insert (insert_operands
);
5743 /* Return insn index for the vector compare instruction for given CODE,
5744 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5747 get_vec_cmp_insn (enum rtx_code code
,
5748 enum machine_mode dest_mode
,
5749 enum machine_mode op_mode
)
5755 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5756 return CODE_FOR_ceq_v16qi
;
5757 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5758 return CODE_FOR_ceq_v8hi
;
5759 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5760 return CODE_FOR_ceq_v4si
;
5761 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5762 return CODE_FOR_ceq_v4sf
;
5763 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5764 return CODE_FOR_ceq_v2df
;
5767 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5768 return CODE_FOR_cgt_v16qi
;
5769 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5770 return CODE_FOR_cgt_v8hi
;
5771 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5772 return CODE_FOR_cgt_v4si
;
5773 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5774 return CODE_FOR_cgt_v4sf
;
5775 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5776 return CODE_FOR_cgt_v2df
;
5779 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5780 return CODE_FOR_clgt_v16qi
;
5781 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5782 return CODE_FOR_clgt_v8hi
;
5783 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5784 return CODE_FOR_clgt_v4si
;
5792 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5793 DMODE is expected destination mode. This is a recursive function. */
5796 spu_emit_vector_compare (enum rtx_code rcode
,
5798 enum machine_mode dmode
)
5802 enum machine_mode dest_mode
;
5803 enum machine_mode op_mode
= GET_MODE (op1
);
5805 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
5807 /* Floating point vector compare instructions uses destination V4SImode.
5808 Double floating point vector compare instructions uses destination V2DImode.
5809 Move destination to appropriate mode later. */
5810 if (dmode
== V4SFmode
)
5811 dest_mode
= V4SImode
;
5812 else if (dmode
== V2DFmode
)
5813 dest_mode
= V2DImode
;
5817 mask
= gen_reg_rtx (dest_mode
);
5818 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5820 if (vec_cmp_insn
== -1)
5822 bool swap_operands
= false;
5823 bool try_again
= false;
5828 swap_operands
= true;
5833 swap_operands
= true;
5837 /* Treat A != B as ~(A==B). */
5839 enum insn_code nor_code
;
5840 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5841 nor_code
= optab_handler (one_cmpl_optab
, (int)dest_mode
)->insn_code
;
5842 gcc_assert (nor_code
!= CODE_FOR_nothing
);
5843 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
5844 if (dmode
!= dest_mode
)
5846 rtx temp
= gen_reg_rtx (dest_mode
);
5847 convert_move (temp
, mask
, 0);
5857 /* Try GT/GTU/LT/LTU OR EQ */
5860 enum insn_code ior_code
;
5861 enum rtx_code new_code
;
5865 case GE
: new_code
= GT
; break;
5866 case GEU
: new_code
= GTU
; break;
5867 case LE
: new_code
= LT
; break;
5868 case LEU
: new_code
= LTU
; break;
5873 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
5874 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5876 ior_code
= optab_handler (ior_optab
, (int)dest_mode
)->insn_code
;
5877 gcc_assert (ior_code
!= CODE_FOR_nothing
);
5878 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
5879 if (dmode
!= dest_mode
)
5881 rtx temp
= gen_reg_rtx (dest_mode
);
5882 convert_move (temp
, mask
, 0);
5892 /* You only get two chances. */
5894 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5896 gcc_assert (vec_cmp_insn
!= -1);
5907 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
5908 if (dmode
!= dest_mode
)
5910 rtx temp
= gen_reg_rtx (dest_mode
);
5911 convert_move (temp
, mask
, 0);
5918 /* Emit vector conditional expression.
5919 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5920 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5923 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
5924 rtx cond
, rtx cc_op0
, rtx cc_op1
)
5926 enum machine_mode dest_mode
= GET_MODE (dest
);
5927 enum rtx_code rcode
= GET_CODE (cond
);
5930 /* Get the vector mask for the given relational operations. */
5931 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
5933 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
5939 spu_force_reg (enum machine_mode mode
, rtx op
)
5942 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
5944 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
5945 || GET_MODE (op
) == BLKmode
)
5946 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
5950 r
= force_reg (GET_MODE (op
), op
);
5951 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
5953 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
5958 x
= gen_reg_rtx (mode
);
5959 emit_insn (gen_spu_convert (x
, r
));
5964 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
5966 HOST_WIDE_INT v
= 0;
5968 /* Check the range of immediate operands. */
5969 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
5971 int range
= p
- SPU_BTI_7
;
5973 if (!CONSTANT_P (op
))
5974 error ("%s expects an integer literal in the range [%d, %d].",
5976 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
5978 if (GET_CODE (op
) == CONST
5979 && (GET_CODE (XEXP (op
, 0)) == PLUS
5980 || GET_CODE (XEXP (op
, 0)) == MINUS
))
5982 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
5983 op
= XEXP (XEXP (op
, 0), 0);
5985 else if (GET_CODE (op
) == CONST_INT
)
5987 else if (GET_CODE (op
) == CONST_VECTOR
5988 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
5989 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
5991 /* The default for v is 0 which is valid in every range. */
5992 if (v
< spu_builtin_range
[range
].low
5993 || v
> spu_builtin_range
[range
].high
)
5994 error ("%s expects an integer literal in the range [%d, %d]. ("
5995 HOST_WIDE_INT_PRINT_DEC
")",
5997 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6006 /* This is only used in lqa, and stqa. Even though the insns
6007 encode 16 bits of the address (all but the 2 least
6008 significant), only 14 bits are used because it is masked to
6009 be 16 byte aligned. */
6013 /* This is used for lqr and stqr. */
6020 if (GET_CODE (op
) == LABEL_REF
6021 || (GET_CODE (op
) == SYMBOL_REF
6022 && SYMBOL_REF_FUNCTION_P (op
))
6023 || (v
& ((1 << lsbits
) - 1)) != 0)
6024 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
6031 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6032 rtx target
, rtx ops
[])
6034 enum insn_code icode
= (enum insn_code
) d
->icode
;
6037 /* Expand the arguments into rtl. */
6039 if (d
->parm
[0] != SPU_BTI_VOID
)
6042 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6044 tree arg
= CALL_EXPR_ARG (exp
, a
);
6047 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6050 /* The insn pattern may have additional operands (SCRATCH).
6051 Return the number of actual non-SCRATCH operands. */
6052 gcc_assert (i
<= insn_data
[icode
].n_operands
);
6057 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6058 tree exp
, rtx target
)
6062 enum insn_code icode
= (enum insn_code
) d
->icode
;
6063 enum machine_mode mode
, tmode
;
6068 /* Set up ops[] with values from arglist. */
6069 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6071 /* Handle the target operand which must be operand 0. */
6073 if (d
->parm
[0] != SPU_BTI_VOID
)
6076 /* We prefer the mode specified for the match_operand otherwise
6077 use the mode from the builtin function prototype. */
6078 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6079 if (tmode
== VOIDmode
)
6080 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6082 /* Try to use target because not using it can lead to extra copies
6083 and when we are using all of the registers extra copies leads
6085 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6088 target
= ops
[0] = gen_reg_rtx (tmode
);
6090 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6096 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6098 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6103 arg
= CALL_EXPR_ARG (exp
, 0);
6104 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
6105 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6106 addr
= memory_address (mode
, op
);
6109 op
= gen_reg_rtx (GET_MODE (addr
));
6110 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6111 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6112 op
= gen_rtx_MEM (mode
, op
);
6114 pat
= GEN_FCN (icode
) (target
, op
);
6121 /* Ignore align_hint, but still expand it's args in case they have
6123 if (icode
== CODE_FOR_spu_align_hint
)
6126 /* Handle the rest of the operands. */
6127 for (p
= 1; i
< n_operands
; i
++, p
++)
6129 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6130 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6132 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6134 /* mode can be VOIDmode here for labels */
6136 /* For specific intrinsics with an immediate operand, e.g.,
6137 si_ai(), we sometimes need to convert the scalar argument to a
6138 vector argument by splatting the scalar. */
6139 if (VECTOR_MODE_P (mode
)
6140 && (GET_CODE (ops
[i
]) == CONST_INT
6141 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6142 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6144 if (GET_CODE (ops
[i
]) == CONST_INT
)
6145 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6148 rtx reg
= gen_reg_rtx (mode
);
6149 enum machine_mode imode
= GET_MODE_INNER (mode
);
6150 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6151 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6152 if (imode
!= GET_MODE (ops
[i
]))
6153 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6154 TYPE_UNSIGNED (spu_builtin_types
6156 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6161 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6163 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6164 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6170 pat
= GEN_FCN (icode
) (0);
6173 pat
= GEN_FCN (icode
) (ops
[0]);
6176 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6179 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6182 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6185 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6188 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6197 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6198 emit_call_insn (pat
);
6199 else if (d
->type
== B_JUMP
)
6201 emit_jump_insn (pat
);
6207 return_type
= spu_builtin_types
[d
->parm
[0]];
6208 if (d
->parm
[0] != SPU_BTI_VOID
6209 && GET_MODE (target
) != TYPE_MODE (return_type
))
6211 /* target is the return value. It should always be the mode of
6212 the builtin function prototype. */
6213 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6220 spu_expand_builtin (tree exp
,
6222 rtx subtarget ATTRIBUTE_UNUSED
,
6223 enum machine_mode mode ATTRIBUTE_UNUSED
,
6224 int ignore ATTRIBUTE_UNUSED
)
6226 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6227 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
6228 struct spu_builtin_description
*d
;
6230 if (fcode
< NUM_SPU_BUILTINS
)
6232 d
= &spu_builtins
[fcode
];
6234 return spu_expand_builtin_1 (d
, exp
, target
);
6239 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6241 spu_builtin_mul_widen_even (tree type
)
6243 switch (TYPE_MODE (type
))
6246 if (TYPE_UNSIGNED (type
))
6247 return spu_builtins
[SPU_MULE_0
].fndecl
;
6249 return spu_builtins
[SPU_MULE_1
].fndecl
;
6256 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6258 spu_builtin_mul_widen_odd (tree type
)
6260 switch (TYPE_MODE (type
))
6263 if (TYPE_UNSIGNED (type
))
6264 return spu_builtins
[SPU_MULO_1
].fndecl
;
6266 return spu_builtins
[SPU_MULO_0
].fndecl
;
6273 /* Implement targetm.vectorize.builtin_mask_for_load. */
6275 spu_builtin_mask_for_load (void)
6277 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
6282 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6284 spu_builtin_vectorization_cost (bool runtime_test
)
6286 /* If the branch of the runtime test is taken - i.e. - the vectorized
6287 version is skipped - this incurs a misprediction cost (because the
6288 vectorized version is expected to be the fall-through). So we subtract
6289 the latency of a mispredicted branch from the costs that are incurred
6290 when the vectorized version is executed. */
6297 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6298 after applying N number of iterations. This routine does not determine
6299 how may iterations are required to reach desired alignment. */
6302 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6307 /* All other types are naturally aligned. */
6311 /* Implement targetm.vectorize.builtin_vec_perm. */
6313 spu_builtin_vec_perm (tree type
, tree
*mask_element_type
)
6315 struct spu_builtin_description
*d
;
6317 *mask_element_type
= unsigned_char_type_node
;
6319 switch (TYPE_MODE (type
))
6322 if (TYPE_UNSIGNED (type
))
6323 d
= &spu_builtins
[SPU_SHUFFLE_0
];
6325 d
= &spu_builtins
[SPU_SHUFFLE_1
];
6329 if (TYPE_UNSIGNED (type
))
6330 d
= &spu_builtins
[SPU_SHUFFLE_2
];
6332 d
= &spu_builtins
[SPU_SHUFFLE_3
];
6336 if (TYPE_UNSIGNED (type
))
6337 d
= &spu_builtins
[SPU_SHUFFLE_4
];
6339 d
= &spu_builtins
[SPU_SHUFFLE_5
];
6343 if (TYPE_UNSIGNED (type
))
6344 d
= &spu_builtins
[SPU_SHUFFLE_6
];
6346 d
= &spu_builtins
[SPU_SHUFFLE_7
];
6350 d
= &spu_builtins
[SPU_SHUFFLE_8
];
6354 d
= &spu_builtins
[SPU_SHUFFLE_9
];
6365 /* Count the total number of instructions in each pipe and return the
6366 maximum, which is used as the Minimum Iteration Interval (MII)
6367 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6368 -2 are instructions that can go in pipe0 or pipe1. */
6370 spu_sms_res_mii (struct ddg
*g
)
6373 unsigned t
[4] = {0, 0, 0, 0};
6375 for (i
= 0; i
< g
->num_nodes
; i
++)
6377 rtx insn
= g
->nodes
[i
].insn
;
6378 int p
= get_pipe (insn
) + 2;
6384 if (dump_file
&& INSN_P (insn
))
6385 fprintf (dump_file
, "i%d %s %d %d\n",
6387 insn_data
[INSN_CODE(insn
)].name
,
6391 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6393 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6398 spu_init_expanders (void)
6400 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6401 * frame_pointer_needed is true. We don't know that until we're
6402 * expanding the prologue. */
6404 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6407 static enum machine_mode
6408 spu_libgcc_cmp_return_mode (void)
6411 /* For SPU word mode is TI mode so it is better to use SImode
6412 for compare returns. */
6416 static enum machine_mode
6417 spu_libgcc_shift_count_mode (void)
6419 /* For SPU word mode is TI mode so it is better to use SImode
6420 for shift counts. */
6424 /* An early place to adjust some flags after GCC has finished processing
6427 asm_file_start (void)
6429 /* Variable tracking should be run after all optimizations which
6430 change order of insns. It also needs a valid CFG. */
6431 spu_flag_var_tracking
= flag_var_tracking
;
6432 flag_var_tracking
= 0;
6434 default_file_start ();
6437 /* Implement targetm.section_type_flags. */
6439 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6441 /* .toe needs to have type @nobits. */
6442 if (strcmp (name
, ".toe") == 0)
6444 return default_section_type_flags (decl
, name
, reloc
);
6447 /* Generate a constant or register which contains 2^SCALE. We assume
6448 the result is valid for MODE. Currently, MODE must be V4SFmode and
6449 SCALE must be SImode. */
6451 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
6453 gcc_assert (mode
== V4SFmode
);
6454 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6455 if (GET_CODE (scale
) != CONST_INT
)
6457 /* unsigned int exp = (127 + scale) << 23;
6458 __vector float m = (__vector float) spu_splats (exp); */
6459 rtx reg
= force_reg (SImode
, scale
);
6460 rtx exp
= gen_reg_rtx (SImode
);
6461 rtx mul
= gen_reg_rtx (mode
);
6462 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6463 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6464 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6469 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6470 unsigned char arr
[16];
6471 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6472 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6473 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6474 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6475 return array_to_constant (mode
, arr
);