1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010, 2011
2 Free Software Foundation, Inc.
4 This file is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 3 of the License, or (at your option)
9 This file is distributed in the hope that it will be useful, but WITHOUT
10 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with GCC; see the file COPYING3. If not see
16 <http://www.gnu.org/licenses/>. */
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "diagnostic-core.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
52 #include "tm-constrs.h"
58 /* Builtin types, data and prototypes. */
60 enum spu_builtin_type_index
62 SPU_BTI_END_OF_PARAMS
,
64 /* We create new type nodes for these. */
76 /* A 16-byte type. (Implemented with V16QI_type_node) */
79 /* These all correspond to intSI_type_node */
93 /* These correspond to the standard types */
113 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
114 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
115 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
116 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
117 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
118 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
119 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
120 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
121 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
122 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
126 struct spu_builtin_range
131 static struct spu_builtin_range spu_builtin_range
[] = {
132 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
133 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
134 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
135 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
136 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
137 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
138 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
139 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
140 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
141 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
142 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
143 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
147 /* Target specific attribute specifications. */
148 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
150 /* Prototypes and external defs. */
151 static void spu_option_override (void);
152 static void spu_option_default_params (void);
153 static void spu_init_builtins (void);
154 static tree
spu_builtin_decl (unsigned, bool);
155 static bool spu_scalar_mode_supported_p (enum machine_mode mode
);
156 static bool spu_vector_mode_supported_p (enum machine_mode mode
);
157 static bool spu_legitimate_address_p (enum machine_mode
, rtx
, bool);
158 static bool spu_addr_space_legitimate_address_p (enum machine_mode
, rtx
,
160 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
161 static rtx
get_pic_reg (void);
162 static int need_to_save_reg (int regno
, int saving
);
163 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
164 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
165 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
167 static void emit_nop_for_insn (rtx insn
);
168 static bool insn_clobbers_hbr (rtx insn
);
169 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
170 int distance
, sbitmap blocks
);
171 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
172 enum machine_mode dmode
);
173 static rtx
get_branch_target (rtx branch
);
174 static void spu_machine_dependent_reorg (void);
175 static int spu_sched_issue_rate (void);
176 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
178 static int get_pipe (rtx insn
);
179 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
180 static void spu_sched_init_global (FILE *, int, int);
181 static void spu_sched_init (FILE *, int, int);
182 static int spu_sched_reorder (FILE *, int, rtx
*, int *, int);
183 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
186 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
189 static int spu_naked_function_p (tree func
);
190 static bool spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
191 const_tree type
, bool named
);
192 static rtx
spu_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
193 const_tree type
, bool named
);
194 static void spu_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
195 const_tree type
, bool named
);
196 static tree
spu_build_builtin_va_list (void);
197 static void spu_va_start (tree
, rtx
);
198 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
,
199 gimple_seq
* pre_p
, gimple_seq
* post_p
);
200 static int store_with_one_insn_p (rtx mem
);
201 static int mem_is_padded_component_ref (rtx x
);
202 static int reg_aligned_for_addr (rtx x
);
203 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
204 static void spu_asm_globalize_label (FILE * file
, const char *name
);
205 static bool spu_rtx_costs (rtx x
, int code
, int outer_code
,
206 int *total
, bool speed
);
207 static bool spu_function_ok_for_sibcall (tree decl
, tree exp
);
208 static void spu_init_libfuncs (void);
209 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
210 static void fix_range (const char *);
211 static void spu_encode_section_info (tree
, rtx
, int);
212 static rtx
spu_legitimize_address (rtx
, rtx
, enum machine_mode
);
213 static rtx
spu_addr_space_legitimize_address (rtx
, rtx
, enum machine_mode
,
215 static tree
spu_builtin_mul_widen_even (tree
);
216 static tree
spu_builtin_mul_widen_odd (tree
);
217 static tree
spu_builtin_mask_for_load (void);
218 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt
, tree
, int);
219 static bool spu_vector_alignment_reachable (const_tree
, bool);
220 static tree
spu_builtin_vec_perm (tree
, tree
*);
221 static enum machine_mode
spu_addr_space_pointer_mode (addr_space_t
);
222 static enum machine_mode
spu_addr_space_address_mode (addr_space_t
);
223 static bool spu_addr_space_subset_p (addr_space_t
, addr_space_t
);
224 static rtx
spu_addr_space_convert (rtx
, tree
, tree
);
225 static int spu_sms_res_mii (struct ddg
*g
);
226 static void asm_file_start (void);
227 static unsigned int spu_section_type_flags (tree
, const char *, int);
228 static section
*spu_select_section (tree
, int, unsigned HOST_WIDE_INT
);
229 static void spu_unique_section (tree
, int);
230 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
231 static void spu_trampoline_init (rtx
, tree
, rtx
);
232 static void spu_conditional_register_usage (void);
233 static bool spu_ref_may_alias_errno (ao_ref
*);
234 static void spu_output_mi_thunk (FILE *, tree
, HOST_WIDE_INT
,
235 HOST_WIDE_INT
, tree
);
237 /* Which instruction set architecture to use. */
239 /* Which cpu are we tuning for. */
242 /* The hardware requires 8 insns between a hint and the branch it
243 effects. This variable describes how many rtl instructions the
244 compiler needs to see before inserting a hint, and then the compiler
245 will insert enough nops to make it at least 8 insns. The default is
246 for the compiler to allow up to 2 nops be emitted. The nops are
247 inserted in pairs, so we round down. */
248 int spu_hint_dist
= (8*4) - (2*4);
263 IC_POOL
, /* constant pool */
264 IC_IL1
, /* one il* instruction */
265 IC_IL2
, /* both ilhu and iohl instructions */
266 IC_IL1s
, /* one il* instruction */
267 IC_IL2s
, /* both ilhu and iohl instructions */
268 IC_FSMBI
, /* the fsmbi instruction */
269 IC_CPAT
, /* one of the c*d instructions */
270 IC_FSMBI2
/* fsmbi plus 1 other instruction */
273 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
274 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
275 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
276 static enum immediate_class
classify_immediate (rtx op
,
277 enum machine_mode mode
);
279 static enum machine_mode
spu_unwind_word_mode (void);
281 static enum machine_mode
282 spu_libgcc_cmp_return_mode (void);
284 static enum machine_mode
285 spu_libgcc_shift_count_mode (void);
287 /* Pointer mode for __ea references. */
288 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
291 /* Table of machine attributes. */
292 static const struct attribute_spec spu_attribute_table
[] =
294 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
295 affects_type_identity } */
296 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
,
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
,
300 { NULL
, 0, 0, false, false, false, NULL
, false }
303 /* TARGET overrides. */
305 #undef TARGET_ADDR_SPACE_POINTER_MODE
306 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
308 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
309 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
311 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
312 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
313 spu_addr_space_legitimate_address_p
315 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
316 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
318 #undef TARGET_ADDR_SPACE_SUBSET_P
319 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
321 #undef TARGET_ADDR_SPACE_CONVERT
322 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
324 #undef TARGET_INIT_BUILTINS
325 #define TARGET_INIT_BUILTINS spu_init_builtins
326 #undef TARGET_BUILTIN_DECL
327 #define TARGET_BUILTIN_DECL spu_builtin_decl
329 #undef TARGET_EXPAND_BUILTIN
330 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
332 #undef TARGET_UNWIND_WORD_MODE
333 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
335 #undef TARGET_LEGITIMIZE_ADDRESS
336 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
338 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
339 and .quad for the debugger. When it is known that the assembler is fixed,
340 these can be removed. */
341 #undef TARGET_ASM_UNALIGNED_SI_OP
342 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
344 #undef TARGET_ASM_ALIGNED_DI_OP
345 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
347 /* The .8byte directive doesn't seem to work well for a 32 bit
349 #undef TARGET_ASM_UNALIGNED_DI_OP
350 #define TARGET_ASM_UNALIGNED_DI_OP NULL
352 #undef TARGET_RTX_COSTS
353 #define TARGET_RTX_COSTS spu_rtx_costs
355 #undef TARGET_ADDRESS_COST
356 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
358 #undef TARGET_SCHED_ISSUE_RATE
359 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
361 #undef TARGET_SCHED_INIT_GLOBAL
362 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
364 #undef TARGET_SCHED_INIT
365 #define TARGET_SCHED_INIT spu_sched_init
367 #undef TARGET_SCHED_VARIABLE_ISSUE
368 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
370 #undef TARGET_SCHED_REORDER
371 #define TARGET_SCHED_REORDER spu_sched_reorder
373 #undef TARGET_SCHED_REORDER2
374 #define TARGET_SCHED_REORDER2 spu_sched_reorder
376 #undef TARGET_SCHED_ADJUST_COST
377 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
382 #undef TARGET_ASM_INTEGER
383 #define TARGET_ASM_INTEGER spu_assemble_integer
385 #undef TARGET_SCALAR_MODE_SUPPORTED_P
386 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
388 #undef TARGET_VECTOR_MODE_SUPPORTED_P
389 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
394 #undef TARGET_ASM_GLOBALIZE_LABEL
395 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
397 #undef TARGET_PASS_BY_REFERENCE
398 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
400 #undef TARGET_FUNCTION_ARG
401 #define TARGET_FUNCTION_ARG spu_function_arg
403 #undef TARGET_FUNCTION_ARG_ADVANCE
404 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
406 #undef TARGET_MUST_PASS_IN_STACK
407 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
409 #undef TARGET_BUILD_BUILTIN_VA_LIST
410 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
412 #undef TARGET_EXPAND_BUILTIN_VA_START
413 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
415 #undef TARGET_SETUP_INCOMING_VARARGS
416 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
418 #undef TARGET_MACHINE_DEPENDENT_REORG
419 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
421 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
422 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
424 #undef TARGET_INIT_LIBFUNCS
425 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
427 #undef TARGET_RETURN_IN_MEMORY
428 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
430 #undef TARGET_ENCODE_SECTION_INFO
431 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
433 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
434 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
436 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
437 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
439 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
440 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
442 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
443 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
445 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
446 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
448 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
449 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
451 #undef TARGET_LIBGCC_CMP_RETURN_MODE
452 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
454 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
455 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
457 #undef TARGET_SCHED_SMS_RES_MII
458 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
460 #undef TARGET_ASM_FILE_START
461 #define TARGET_ASM_FILE_START asm_file_start
463 #undef TARGET_SECTION_TYPE_FLAGS
464 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
466 #undef TARGET_ASM_SELECT_SECTION
467 #define TARGET_ASM_SELECT_SECTION spu_select_section
469 #undef TARGET_ASM_UNIQUE_SECTION
470 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
472 #undef TARGET_LEGITIMATE_ADDRESS_P
473 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
475 #undef TARGET_LEGITIMATE_CONSTANT_P
476 #define TARGET_LEGITIMATE_CONSTANT_P spu_legitimate_constant_p
478 #undef TARGET_TRAMPOLINE_INIT
479 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
481 #undef TARGET_OPTION_OVERRIDE
482 #define TARGET_OPTION_OVERRIDE spu_option_override
484 #undef TARGET_OPTION_DEFAULT_PARAMS
485 #define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
487 #undef TARGET_CONDITIONAL_REGISTER_USAGE
488 #define TARGET_CONDITIONAL_REGISTER_USAGE spu_conditional_register_usage
490 #undef TARGET_REF_MAY_ALIAS_ERRNO
491 #define TARGET_REF_MAY_ALIAS_ERRNO spu_ref_may_alias_errno
493 #undef TARGET_ASM_OUTPUT_MI_THUNK
494 #define TARGET_ASM_OUTPUT_MI_THUNK spu_output_mi_thunk
495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
498 /* Variable tracking should be run after all optimizations which
499 change order of insns. It also needs a valid CFG. */
500 #undef TARGET_DELAY_VARTRACK
501 #define TARGET_DELAY_VARTRACK true
503 struct gcc_target targetm
= TARGET_INITIALIZER
;
505 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
507 spu_option_default_params (void)
509 /* Override some of the default param values. With so many registers
510 larger values are better for these params. */
511 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH
, 128);
514 /* Implement TARGET_OPTION_OVERRIDE. */
516 spu_option_override (void)
518 /* Small loops will be unpeeled at -O3. For SPU it is more important
519 to keep code small by default. */
520 if (!flag_unroll_loops
&& !flag_peel_loops
)
521 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 4,
522 global_options
.x_param_values
,
523 global_options_set
.x_param_values
);
525 flag_omit_frame_pointer
= 1;
527 /* Functions must be 8 byte aligned so we correctly handle dual issue */
528 if (align_functions
< 8)
531 spu_hint_dist
= 8*4 - spu_max_nops
*4;
532 if (spu_hint_dist
< 0)
535 if (spu_fixed_range_string
)
536 fix_range (spu_fixed_range_string
);
538 /* Determine processor architectural level. */
541 if (strcmp (&spu_arch_string
[0], "cell") == 0)
542 spu_arch
= PROCESSOR_CELL
;
543 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
544 spu_arch
= PROCESSOR_CELLEDP
;
546 error ("bad value (%s) for -march= switch", spu_arch_string
);
549 /* Determine processor to tune for. */
552 if (strcmp (&spu_tune_string
[0], "cell") == 0)
553 spu_tune
= PROCESSOR_CELL
;
554 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
555 spu_tune
= PROCESSOR_CELLEDP
;
557 error ("bad value (%s) for -mtune= switch", spu_tune_string
);
560 /* Change defaults according to the processor architecture. */
561 if (spu_arch
== PROCESSOR_CELLEDP
)
563 /* If no command line option has been otherwise specified, change
564 the default to -mno-safe-hints on celledp -- only the original
565 Cell/B.E. processors require this workaround. */
566 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
567 target_flags
&= ~MASK_SAFE_HINTS
;
570 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
573 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
574 struct attribute_spec.handler. */
576 /* True if MODE is valid for the target. By "valid", we mean able to
577 be manipulated in non-trivial ways. In particular, this means all
578 the arithmetic is supported. */
580 spu_scalar_mode_supported_p (enum machine_mode mode
)
598 /* Similarly for vector modes. "Supported" here is less strict. At
599 least some operations are supported; need to check optabs or builtins
600 for further details. */
602 spu_vector_mode_supported_p (enum machine_mode mode
)
619 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
620 least significant bytes of the outer mode. This function returns
621 TRUE for the SUBREG's where this is correct. */
623 valid_subreg (rtx op
)
625 enum machine_mode om
= GET_MODE (op
);
626 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
627 return om
!= VOIDmode
&& im
!= VOIDmode
628 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
629 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
630 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
633 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
634 and adjust the start offset. */
636 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
638 enum machine_mode mode
;
640 /* Strip any paradoxical SUBREG. */
641 if (GET_CODE (op
) == SUBREG
642 && (GET_MODE_BITSIZE (GET_MODE (op
))
643 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
647 GET_MODE_BITSIZE (GET_MODE (op
)) -
648 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
649 op
= SUBREG_REG (op
);
651 /* If it is smaller than SI, assure a SUBREG */
652 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
656 *start
+= 32 - op_size
;
659 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
660 mode
= mode_for_size (op_size
, MODE_INT
, 0);
661 if (mode
!= GET_MODE (op
))
662 op
= gen_rtx_SUBREG (mode
, op
, 0);
667 spu_expand_extv (rtx ops
[], int unsignedp
)
669 rtx dst
= ops
[0], src
= ops
[1];
670 HOST_WIDE_INT width
= INTVAL (ops
[2]);
671 HOST_WIDE_INT start
= INTVAL (ops
[3]);
672 HOST_WIDE_INT align_mask
;
673 rtx s0
, s1
, mask
, r0
;
675 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
679 /* First, determine if we need 1 TImode load or 2. We need only 1
680 if the bits being extracted do not cross the alignment boundary
681 as determined by the MEM and its address. */
683 align_mask
= -MEM_ALIGN (src
);
684 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
686 /* Alignment is sufficient for 1 load. */
687 s0
= gen_reg_rtx (TImode
);
688 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
691 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
696 s0
= gen_reg_rtx (TImode
);
697 s1
= gen_reg_rtx (TImode
);
698 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
701 gcc_assert (start
+ width
<= 128);
704 rtx r1
= gen_reg_rtx (SImode
);
705 mask
= gen_reg_rtx (TImode
);
706 emit_move_insn (mask
, GEN_INT (-1));
707 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
708 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
709 if (GET_CODE (r0
) == CONST_INT
)
710 r1
= GEN_INT (INTVAL (r0
) & 15);
712 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
713 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
714 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
719 else if (GET_CODE (src
) == SUBREG
)
721 rtx r
= SUBREG_REG (src
);
722 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
723 s0
= gen_reg_rtx (TImode
);
724 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
725 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
727 emit_move_insn (s0
, src
);
731 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
732 s0
= gen_reg_rtx (TImode
);
733 emit_move_insn (s0
, src
);
736 /* Now s0 is TImode and contains the bits to extract at start. */
739 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
742 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, 128 - width
, s0
, unsignedp
);
744 emit_move_insn (dst
, s0
);
748 spu_expand_insv (rtx ops
[])
750 HOST_WIDE_INT width
= INTVAL (ops
[1]);
751 HOST_WIDE_INT start
= INTVAL (ops
[2]);
752 HOST_WIDE_INT maskbits
;
753 enum machine_mode dst_mode
;
754 rtx dst
= ops
[0], src
= ops
[3];
761 if (GET_CODE (ops
[0]) == MEM
)
762 dst
= gen_reg_rtx (TImode
);
764 dst
= adjust_operand (dst
, &start
);
765 dst_mode
= GET_MODE (dst
);
766 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
768 if (CONSTANT_P (src
))
770 enum machine_mode m
=
771 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
772 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
774 src
= adjust_operand (src
, 0);
776 mask
= gen_reg_rtx (dst_mode
);
777 shift_reg
= gen_reg_rtx (dst_mode
);
778 shift
= dst_size
- start
- width
;
780 /* It's not safe to use subreg here because the compiler assumes
781 that the SUBREG_REG is right justified in the SUBREG. */
782 convert_move (shift_reg
, src
, 1);
789 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
792 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
795 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
807 maskbits
= (-1ll << (32 - width
- start
));
809 maskbits
+= (1ll << (32 - start
));
810 emit_move_insn (mask
, GEN_INT (maskbits
));
813 maskbits
= (-1ll << (64 - width
- start
));
815 maskbits
+= (1ll << (64 - start
));
816 emit_move_insn (mask
, GEN_INT (maskbits
));
820 unsigned char arr
[16];
822 memset (arr
, 0, sizeof (arr
));
823 arr
[i
] = 0xff >> (start
& 7);
824 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
826 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
827 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
833 if (GET_CODE (ops
[0]) == MEM
)
835 rtx low
= gen_reg_rtx (SImode
);
836 rtx rotl
= gen_reg_rtx (SImode
);
837 rtx mask0
= gen_reg_rtx (TImode
);
843 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
844 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
845 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
846 emit_insn (gen_negsi2 (rotl
, low
));
847 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
848 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
849 mem
= change_address (ops
[0], TImode
, addr0
);
850 set_mem_alias_set (mem
, 0);
851 emit_move_insn (dst
, mem
);
852 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
853 if (start
+ width
> MEM_ALIGN (ops
[0]))
855 rtx shl
= gen_reg_rtx (SImode
);
856 rtx mask1
= gen_reg_rtx (TImode
);
857 rtx dst1
= gen_reg_rtx (TImode
);
859 addr1
= plus_constant (addr
, 16);
860 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
861 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
862 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
863 mem1
= change_address (ops
[0], TImode
, addr1
);
864 set_mem_alias_set (mem1
, 0);
865 emit_move_insn (dst1
, mem1
);
866 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
867 emit_move_insn (mem1
, dst1
);
869 emit_move_insn (mem
, dst
);
872 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
877 spu_expand_block_move (rtx ops
[])
879 HOST_WIDE_INT bytes
, align
, offset
;
880 rtx src
, dst
, sreg
, dreg
, target
;
882 if (GET_CODE (ops
[2]) != CONST_INT
883 || GET_CODE (ops
[3]) != CONST_INT
884 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
887 bytes
= INTVAL (ops
[2]);
888 align
= INTVAL (ops
[3]);
898 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
900 dst
= adjust_address (ops
[0], V16QImode
, offset
);
901 src
= adjust_address (ops
[1], V16QImode
, offset
);
902 emit_move_insn (dst
, src
);
907 unsigned char arr
[16] = { 0 };
908 for (i
= 0; i
< bytes
- offset
; i
++)
910 dst
= adjust_address (ops
[0], V16QImode
, offset
);
911 src
= adjust_address (ops
[1], V16QImode
, offset
);
912 mask
= gen_reg_rtx (V16QImode
);
913 sreg
= gen_reg_rtx (V16QImode
);
914 dreg
= gen_reg_rtx (V16QImode
);
915 target
= gen_reg_rtx (V16QImode
);
916 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
917 emit_move_insn (dreg
, dst
);
918 emit_move_insn (sreg
, src
);
919 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
920 emit_move_insn (dst
, target
);
928 { SPU_EQ
, SPU_GT
, SPU_GTU
};
930 int spu_comp_icode
[12][3] = {
931 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
932 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
933 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
934 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
935 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
936 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
937 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
938 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
939 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
940 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
941 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
942 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
945 /* Generate a compare for CODE. Return a brand-new rtx that represents
946 the result of the compare. GCC can figure this out too if we don't
947 provide all variations of compares, but GCC always wants to use
948 WORD_MODE, we can generate better code in most cases if we do it
951 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
953 int reverse_compare
= 0;
954 int reverse_test
= 0;
955 rtx compare_result
, eq_result
;
956 rtx comp_rtx
, eq_rtx
;
957 enum machine_mode comp_mode
;
958 enum machine_mode op_mode
;
959 enum spu_comp_code scode
, eq_code
;
960 enum insn_code ior_code
;
961 enum rtx_code code
= GET_CODE (cmp
);
962 rtx op0
= XEXP (cmp
, 0);
963 rtx op1
= XEXP (cmp
, 1);
967 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
968 and so on, to keep the constant in operand 1. */
969 if (GET_CODE (op1
) == CONST_INT
)
971 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
972 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
997 op_mode
= GET_MODE (op0
);
1003 if (HONOR_NANS (op_mode
))
1005 reverse_compare
= 0;
1012 reverse_compare
= 1;
1018 if (HONOR_NANS (op_mode
))
1020 reverse_compare
= 1;
1027 reverse_compare
= 0;
1032 reverse_compare
= 1;
1037 reverse_compare
= 1;
1042 reverse_compare
= 0;
1047 reverse_compare
= 1;
1052 reverse_compare
= 0;
1098 comp_mode
= op_mode
;
1102 comp_mode
= op_mode
;
1106 comp_mode
= op_mode
;
1110 comp_mode
= V4SImode
;
1114 comp_mode
= V2DImode
;
1121 if (GET_MODE (op1
) == DFmode
1122 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
1125 if (is_set
== 0 && op1
== const0_rtx
1126 && (GET_MODE (op0
) == SImode
1127 || GET_MODE (op0
) == HImode
) && scode
== SPU_EQ
)
1129 /* Don't need to set a register with the result when we are
1130 comparing against zero and branching. */
1131 reverse_test
= !reverse_test
;
1132 compare_result
= op0
;
1136 compare_result
= gen_reg_rtx (comp_mode
);
1138 if (reverse_compare
)
1145 if (spu_comp_icode
[index
][scode
] == 0)
1148 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
1150 op0
= force_reg (op_mode
, op0
);
1151 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
1153 op1
= force_reg (op_mode
, op1
);
1154 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
1158 emit_insn (comp_rtx
);
1162 eq_result
= gen_reg_rtx (comp_mode
);
1163 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
1168 ior_code
= optab_handler (ior_optab
, comp_mode
);
1169 gcc_assert (ior_code
!= CODE_FOR_nothing
);
1170 emit_insn (GEN_FCN (ior_code
)
1171 (compare_result
, compare_result
, eq_result
));
1180 /* We don't have branch on QI compare insns, so we convert the
1181 QI compare result to a HI result. */
1182 if (comp_mode
== QImode
)
1184 rtx old_res
= compare_result
;
1185 compare_result
= gen_reg_rtx (HImode
);
1187 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
1191 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
1193 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
1195 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
1196 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
1197 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
1200 else if (is_set
== 2)
1202 rtx target
= operands
[0];
1203 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
1204 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
1205 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
1207 rtx op_t
= operands
[2];
1208 rtx op_f
= operands
[3];
1210 /* The result of the comparison can be SI, HI or QI mode. Create a
1211 mask based on that result. */
1212 if (target_size
> compare_size
)
1214 select_mask
= gen_reg_rtx (mode
);
1215 emit_insn (gen_extend_compare (select_mask
, compare_result
));
1217 else if (target_size
< compare_size
)
1219 gen_rtx_SUBREG (mode
, compare_result
,
1220 (compare_size
- target_size
) / BITS_PER_UNIT
);
1221 else if (comp_mode
!= mode
)
1222 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
1224 select_mask
= compare_result
;
1226 if (GET_MODE (target
) != GET_MODE (op_t
)
1227 || GET_MODE (target
) != GET_MODE (op_f
))
1231 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1233 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1237 rtx target
= operands
[0];
1239 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1240 gen_rtx_NOT (comp_mode
, compare_result
)));
1241 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1242 emit_insn (gen_extendhisi2 (target
, compare_result
));
1243 else if (GET_MODE (target
) == SImode
1244 && GET_MODE (compare_result
) == QImode
)
1245 emit_insn (gen_extend_compare (target
, compare_result
));
1247 emit_move_insn (target
, compare_result
);
1252 const_double_to_hwint (rtx x
)
1256 if (GET_MODE (x
) == SFmode
)
1258 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1259 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1261 else if (GET_MODE (x
) == DFmode
)
1264 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1265 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1267 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1275 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1279 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1282 tv
[0] = (v
<< 32) >> 32;
1283 else if (mode
== DFmode
)
1285 tv
[1] = (v
<< 32) >> 32;
1288 real_from_target (&rv
, tv
, mode
);
1289 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1293 print_operand_address (FILE * file
, register rtx addr
)
1298 if (GET_CODE (addr
) == AND
1299 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1300 && INTVAL (XEXP (addr
, 1)) == -16)
1301 addr
= XEXP (addr
, 0);
1303 switch (GET_CODE (addr
))
1306 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1310 reg
= XEXP (addr
, 0);
1311 offset
= XEXP (addr
, 1);
1312 if (GET_CODE (offset
) == REG
)
1314 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1315 reg_names
[REGNO (offset
)]);
1317 else if (GET_CODE (offset
) == CONST_INT
)
1319 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1320 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1330 output_addr_const (file
, addr
);
1340 print_operand (FILE * file
, rtx x
, int code
)
1342 enum machine_mode mode
= GET_MODE (x
);
1344 unsigned char arr
[16];
1345 int xcode
= GET_CODE (x
);
1347 if (GET_MODE (x
) == VOIDmode
)
1350 case 'L': /* 128 bits, signed */
1351 case 'm': /* 128 bits, signed */
1352 case 'T': /* 128 bits, signed */
1353 case 't': /* 128 bits, signed */
1356 case 'K': /* 64 bits, signed */
1357 case 'k': /* 64 bits, signed */
1358 case 'D': /* 64 bits, signed */
1359 case 'd': /* 64 bits, signed */
1362 case 'J': /* 32 bits, signed */
1363 case 'j': /* 32 bits, signed */
1364 case 's': /* 32 bits, signed */
1365 case 'S': /* 32 bits, signed */
1372 case 'j': /* 32 bits, signed */
1373 case 'k': /* 64 bits, signed */
1374 case 'm': /* 128 bits, signed */
1375 if (xcode
== CONST_INT
1376 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1378 gcc_assert (logical_immediate_p (x
, mode
));
1379 constant_to_array (mode
, x
, arr
);
1380 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1381 val
= trunc_int_for_mode (val
, SImode
);
1382 switch (which_logical_immediate (val
))
1387 fprintf (file
, "h");
1390 fprintf (file
, "b");
1400 case 'J': /* 32 bits, signed */
1401 case 'K': /* 64 bits, signed */
1402 case 'L': /* 128 bits, signed */
1403 if (xcode
== CONST_INT
1404 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1406 gcc_assert (logical_immediate_p (x
, mode
)
1407 || iohl_immediate_p (x
, mode
));
1408 constant_to_array (mode
, x
, arr
);
1409 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1410 val
= trunc_int_for_mode (val
, SImode
);
1411 switch (which_logical_immediate (val
))
1417 val
= trunc_int_for_mode (val
, HImode
);
1420 val
= trunc_int_for_mode (val
, QImode
);
1425 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1431 case 't': /* 128 bits, signed */
1432 case 'd': /* 64 bits, signed */
1433 case 's': /* 32 bits, signed */
1436 enum immediate_class c
= classify_immediate (x
, mode
);
1440 constant_to_array (mode
, x
, arr
);
1441 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1442 val
= trunc_int_for_mode (val
, SImode
);
1443 switch (which_immediate_load (val
))
1448 fprintf (file
, "a");
1451 fprintf (file
, "h");
1454 fprintf (file
, "hu");
1461 constant_to_array (mode
, x
, arr
);
1462 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1464 fprintf (file
, "b");
1466 fprintf (file
, "h");
1468 fprintf (file
, "w");
1470 fprintf (file
, "d");
1473 if (xcode
== CONST_VECTOR
)
1475 x
= CONST_VECTOR_ELT (x
, 0);
1476 xcode
= GET_CODE (x
);
1478 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1479 fprintf (file
, "a");
1480 else if (xcode
== HIGH
)
1481 fprintf (file
, "hu");
1495 case 'T': /* 128 bits, signed */
1496 case 'D': /* 64 bits, signed */
1497 case 'S': /* 32 bits, signed */
1500 enum immediate_class c
= classify_immediate (x
, mode
);
1504 constant_to_array (mode
, x
, arr
);
1505 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1506 val
= trunc_int_for_mode (val
, SImode
);
1507 switch (which_immediate_load (val
))
1514 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1519 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1522 constant_to_array (mode
, x
, arr
);
1524 for (i
= 0; i
< 16; i
++)
1529 print_operand (file
, GEN_INT (val
), 0);
1532 constant_to_array (mode
, x
, arr
);
1533 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1534 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1539 if (GET_CODE (x
) == CONST_VECTOR
)
1540 x
= CONST_VECTOR_ELT (x
, 0);
1541 output_addr_const (file
, x
);
1543 fprintf (file
, "@h");
1557 if (xcode
== CONST_INT
)
1559 /* Only 4 least significant bits are relevant for generate
1560 control word instructions. */
1561 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1566 case 'M': /* print code for c*d */
1567 if (GET_CODE (x
) == CONST_INT
)
1571 fprintf (file
, "b");
1574 fprintf (file
, "h");
1577 fprintf (file
, "w");
1580 fprintf (file
, "d");
1589 case 'N': /* Negate the operand */
1590 if (xcode
== CONST_INT
)
1591 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1592 else if (xcode
== CONST_VECTOR
)
1593 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1594 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1597 case 'I': /* enable/disable interrupts */
1598 if (xcode
== CONST_INT
)
1599 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1602 case 'b': /* branch modifiers */
1604 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1605 else if (COMPARISON_P (x
))
1606 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1609 case 'i': /* indirect call */
1612 if (GET_CODE (XEXP (x
, 0)) == REG
)
1613 /* Used in indirect function calls. */
1614 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1616 output_address (XEXP (x
, 0));
1620 case 'p': /* load/store */
1624 xcode
= GET_CODE (x
);
1629 xcode
= GET_CODE (x
);
1632 fprintf (file
, "d");
1633 else if (xcode
== CONST_INT
)
1634 fprintf (file
, "a");
1635 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1636 fprintf (file
, "r");
1637 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1639 if (GET_CODE (XEXP (x
, 1)) == REG
)
1640 fprintf (file
, "x");
1642 fprintf (file
, "d");
1647 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1649 output_addr_const (file
, GEN_INT (val
));
1653 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1655 output_addr_const (file
, GEN_INT (val
));
1659 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1661 output_addr_const (file
, GEN_INT (val
));
1665 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1666 val
= (val
>> 3) & 0x1f;
1667 output_addr_const (file
, GEN_INT (val
));
1671 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1674 output_addr_const (file
, GEN_INT (val
));
1678 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1681 output_addr_const (file
, GEN_INT (val
));
1685 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1688 output_addr_const (file
, GEN_INT (val
));
1692 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1693 val
= -(val
& -8ll);
1694 val
= (val
>> 3) & 0x1f;
1695 output_addr_const (file
, GEN_INT (val
));
1700 constant_to_array (mode
, x
, arr
);
1701 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1702 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1707 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1708 else if (xcode
== MEM
)
1709 output_address (XEXP (x
, 0));
1710 else if (xcode
== CONST_VECTOR
)
1711 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1713 output_addr_const (file
, x
);
1720 output_operand_lossage ("invalid %%xn code");
1725 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1726 caller saved register. For leaf functions it is more efficient to
1727 use a volatile register because we won't need to save and restore the
1728 pic register. This routine is only valid after register allocation
1729 is completed, so we can pick an unused register. */
1733 rtx pic_reg
= pic_offset_table_rtx
;
1734 if (!reload_completed
&& !reload_in_progress
)
1736 if (current_function_is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1737 pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1741 /* Split constant addresses to handle cases that are too large.
1742 Add in the pic register when in PIC mode.
1743 Split immediates that require more than 1 instruction. */
1745 spu_split_immediate (rtx
* ops
)
1747 enum machine_mode mode
= GET_MODE (ops
[0]);
1748 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1754 unsigned char arrhi
[16];
1755 unsigned char arrlo
[16];
1756 rtx to
, temp
, hi
, lo
;
1758 enum machine_mode imode
= mode
;
1759 /* We need to do reals as ints because the constant used in the
1760 IOR might not be a legitimate real constant. */
1761 imode
= int_mode_for_mode (mode
);
1762 constant_to_array (mode
, ops
[1], arrhi
);
1764 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1767 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1768 for (i
= 0; i
< 16; i
+= 4)
1770 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1771 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1772 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1773 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1775 hi
= array_to_constant (imode
, arrhi
);
1776 lo
= array_to_constant (imode
, arrlo
);
1777 emit_move_insn (temp
, hi
);
1778 emit_insn (gen_rtx_SET
1779 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1784 unsigned char arr_fsmbi
[16];
1785 unsigned char arr_andbi
[16];
1786 rtx to
, reg_fsmbi
, reg_and
;
1788 enum machine_mode imode
= mode
;
1789 /* We need to do reals as ints because the constant used in the
1790 * AND might not be a legitimate real constant. */
1791 imode
= int_mode_for_mode (mode
);
1792 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1794 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1797 for (i
= 0; i
< 16; i
++)
1798 if (arr_fsmbi
[i
] != 0)
1800 arr_andbi
[0] = arr_fsmbi
[i
];
1801 arr_fsmbi
[i
] = 0xff;
1803 for (i
= 1; i
< 16; i
++)
1804 arr_andbi
[i
] = arr_andbi
[0];
1805 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1806 reg_and
= array_to_constant (imode
, arr_andbi
);
1807 emit_move_insn (to
, reg_fsmbi
);
1808 emit_insn (gen_rtx_SET
1809 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1813 if (reload_in_progress
|| reload_completed
)
1815 rtx mem
= force_const_mem (mode
, ops
[1]);
1816 if (TARGET_LARGE_MEM
)
1818 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1819 emit_move_insn (addr
, XEXP (mem
, 0));
1820 mem
= replace_equiv_address (mem
, addr
);
1822 emit_move_insn (ops
[0], mem
);
1828 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1832 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1833 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1836 emit_insn (gen_pic (ops
[0], ops
[1]));
1839 rtx pic_reg
= get_pic_reg ();
1840 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1841 crtl
->uses_pic_offset_table
= 1;
1843 return flag_pic
|| c
== IC_IL2s
;
1854 /* SAVING is TRUE when we are generating the actual load and store
1855 instructions for REGNO. When determining the size of the stack
1856 needed for saving register we must allocate enough space for the
1857 worst case, because we don't always have the information early enough
1858 to not allocate it. But we can at least eliminate the actual loads
1859 and stores during the prologue/epilogue. */
1861 need_to_save_reg (int regno
, int saving
)
1863 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1866 && regno
== PIC_OFFSET_TABLE_REGNUM
1867 && (!saving
|| crtl
->uses_pic_offset_table
)
1869 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1874 /* This function is only correct starting with local register
1877 spu_saved_regs_size (void)
1879 int reg_save_size
= 0;
1882 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1883 if (need_to_save_reg (regno
, 0))
1884 reg_save_size
+= 0x10;
1885 return reg_save_size
;
1889 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1891 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1893 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1894 return emit_insn (gen_movv4si (mem
, reg
));
1898 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1900 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1902 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1903 return emit_insn (gen_movv4si (reg
, mem
));
1906 /* This happens after reload, so we need to expand it. */
1908 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1911 if (satisfies_constraint_K (GEN_INT (imm
)))
1913 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1917 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1918 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1919 if (REGNO (src
) == REGNO (scratch
))
1925 /* Return nonzero if this function is known to have a null epilogue. */
1928 direct_return (void)
1930 if (reload_completed
)
1932 if (cfun
->static_chain_decl
== 0
1933 && (spu_saved_regs_size ()
1935 + crtl
->outgoing_args_size
1936 + crtl
->args
.pretend_args_size
== 0)
1937 && current_function_is_leaf
)
1944 The stack frame looks like this:
1948 AP -> +-------------+
1951 prev SP | back chain |
1954 | reg save | crtl->args.pretend_args_size bytes
1957 | saved regs | spu_saved_regs_size() bytes
1958 FP -> +-------------+
1960 | vars | get_frame_size() bytes
1961 HFP -> +-------------+
1964 | args | crtl->outgoing_args_size bytes
1970 SP -> +-------------+
1974 spu_expand_prologue (void)
1976 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1977 HOST_WIDE_INT total_size
;
1978 HOST_WIDE_INT saved_regs_size
;
1979 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1980 rtx scratch_reg_0
, scratch_reg_1
;
1983 if (flag_pic
&& optimize
== 0)
1984 crtl
->uses_pic_offset_table
= 1;
1986 if (spu_naked_function_p (current_function_decl
))
1989 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1990 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1992 saved_regs_size
= spu_saved_regs_size ();
1993 total_size
= size
+ saved_regs_size
1994 + crtl
->outgoing_args_size
1995 + crtl
->args
.pretend_args_size
;
1997 if (!current_function_is_leaf
1998 || cfun
->calls_alloca
|| total_size
> 0)
1999 total_size
+= STACK_POINTER_OFFSET
;
2001 /* Save this first because code after this might use the link
2002 register as a scratch register. */
2003 if (!current_function_is_leaf
)
2005 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2006 RTX_FRAME_RELATED_P (insn
) = 1;
2011 offset
= -crtl
->args
.pretend_args_size
;
2012 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2013 if (need_to_save_reg (regno
, 1))
2016 insn
= frame_emit_store (regno
, sp_reg
, offset
);
2017 RTX_FRAME_RELATED_P (insn
) = 1;
2021 if (flag_pic
&& crtl
->uses_pic_offset_table
)
2023 rtx pic_reg
= get_pic_reg ();
2024 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
2025 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
2030 if (flag_stack_check
)
2032 /* We compare against total_size-1 because
2033 ($sp >= total_size) <=> ($sp > total_size-1) */
2034 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
2035 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
2036 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
2037 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
2039 emit_move_insn (scratch_v4si
, size_v4si
);
2040 size_v4si
= scratch_v4si
;
2042 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
2043 emit_insn (gen_vec_extractv4si
2044 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
2045 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
2048 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2049 the value of the previous $sp because we save it as the back
2051 if (total_size
<= 2000)
2053 /* In this case we save the back chain first. */
2054 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
2056 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
2060 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
2062 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
2064 RTX_FRAME_RELATED_P (insn
) = 1;
2065 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
2066 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
2068 if (total_size
> 2000)
2070 /* Save the back chain ptr */
2071 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
2074 if (frame_pointer_needed
)
2076 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
2077 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
2078 + crtl
->outgoing_args_size
;
2079 /* Set the new frame_pointer */
2080 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
2081 RTX_FRAME_RELATED_P (insn
) = 1;
2082 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
2083 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
2084 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
2088 if (flag_stack_usage_info
)
2089 current_function_static_stack_size
= total_size
;
2093 spu_expand_epilogue (bool sibcall_p
)
2095 int size
= get_frame_size (), offset
, regno
;
2096 HOST_WIDE_INT saved_regs_size
, total_size
;
2097 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
2100 if (spu_naked_function_p (current_function_decl
))
2103 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
2105 saved_regs_size
= spu_saved_regs_size ();
2106 total_size
= size
+ saved_regs_size
2107 + crtl
->outgoing_args_size
2108 + crtl
->args
.pretend_args_size
;
2110 if (!current_function_is_leaf
2111 || cfun
->calls_alloca
|| total_size
> 0)
2112 total_size
+= STACK_POINTER_OFFSET
;
2116 if (cfun
->calls_alloca
)
2117 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
2119 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
2122 if (saved_regs_size
> 0)
2124 offset
= -crtl
->args
.pretend_args_size
;
2125 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2126 if (need_to_save_reg (regno
, 1))
2129 frame_emit_load (regno
, sp_reg
, offset
);
2134 if (!current_function_is_leaf
)
2135 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2139 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
2140 emit_jump_insn (gen__return ());
2145 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
2149 /* This is inefficient because it ends up copying to a save-register
2150 which then gets saved even though $lr has already been saved. But
2151 it does generate better code for leaf functions and we don't need
2152 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2153 used for __builtin_return_address anyway, so maybe we don't care if
2154 it's inefficient. */
2155 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
2159 /* Given VAL, generate a constant appropriate for MODE.
2160 If MODE is a vector mode, every element will be VAL.
2161 For TImode, VAL will be zero extended to 128 bits. */
2163 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
2169 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
2170 || GET_MODE_CLASS (mode
) == MODE_FLOAT
2171 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
2172 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
2174 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2175 return immed_double_const (val
, 0, mode
);
2177 /* val is the bit representation of the float */
2178 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
2179 return hwint_to_const_double (mode
, val
);
2181 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
2182 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
2184 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
2186 units
= GET_MODE_NUNITS (mode
);
2188 v
= rtvec_alloc (units
);
2190 for (i
= 0; i
< units
; ++i
)
2191 RTVEC_ELT (v
, i
) = inner
;
2193 return gen_rtx_CONST_VECTOR (mode
, v
);
2196 /* Create a MODE vector constant from 4 ints. */
2198 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
2200 unsigned char arr
[16];
2201 arr
[0] = (a
>> 24) & 0xff;
2202 arr
[1] = (a
>> 16) & 0xff;
2203 arr
[2] = (a
>> 8) & 0xff;
2204 arr
[3] = (a
>> 0) & 0xff;
2205 arr
[4] = (b
>> 24) & 0xff;
2206 arr
[5] = (b
>> 16) & 0xff;
2207 arr
[6] = (b
>> 8) & 0xff;
2208 arr
[7] = (b
>> 0) & 0xff;
2209 arr
[8] = (c
>> 24) & 0xff;
2210 arr
[9] = (c
>> 16) & 0xff;
2211 arr
[10] = (c
>> 8) & 0xff;
2212 arr
[11] = (c
>> 0) & 0xff;
2213 arr
[12] = (d
>> 24) & 0xff;
2214 arr
[13] = (d
>> 16) & 0xff;
2215 arr
[14] = (d
>> 8) & 0xff;
2216 arr
[15] = (d
>> 0) & 0xff;
2217 return array_to_constant(mode
, arr
);
2220 /* branch hint stuff */
2222 /* An array of these is used to propagate hints to predecessor blocks. */
2225 rtx prop_jump
; /* propagated from another block */
2226 int bb_index
; /* the original block. */
2228 static struct spu_bb_info
*spu_bb_info
;
2230 #define STOP_HINT_P(INSN) \
2231 (GET_CODE(INSN) == CALL_INSN \
2232 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2233 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2235 /* 1 when RTX is a hinted branch or its target. We keep track of
2236 what has been hinted so the safe-hint code can test it easily. */
2237 #define HINTED_P(RTX) \
2238 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2240 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2241 #define SCHED_ON_EVEN_P(RTX) \
2242 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2244 /* Emit a nop for INSN such that the two will dual issue. This assumes
2245 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2246 We check for TImode to handle a MULTI1 insn which has dual issued its
2247 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2250 emit_nop_for_insn (rtx insn
)
2254 p
= get_pipe (insn
);
2255 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2256 new_insn
= emit_insn_after (gen_lnop (), insn
);
2257 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2259 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2260 PUT_MODE (new_insn
, TImode
);
2261 PUT_MODE (insn
, VOIDmode
);
2264 new_insn
= emit_insn_after (gen_lnop (), insn
);
2265 recog_memoized (new_insn
);
2266 INSN_LOCATOR (new_insn
) = INSN_LOCATOR (insn
);
2269 /* Insert nops in basic blocks to meet dual issue alignment
2270 requirements. Also make sure hbrp and hint instructions are at least
2271 one cycle apart, possibly inserting a nop. */
2275 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2279 /* This sets up INSN_ADDRESSES. */
2280 shorten_branches (get_insns ());
2282 /* Keep track of length added by nops. */
2286 insn
= get_insns ();
2287 if (!active_insn_p (insn
))
2288 insn
= next_active_insn (insn
);
2289 for (; insn
; insn
= next_insn
)
2291 next_insn
= next_active_insn (insn
);
2292 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2293 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2297 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2298 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2299 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2302 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2303 PUT_MODE (prev_insn
, GET_MODE (insn
));
2304 PUT_MODE (insn
, TImode
);
2305 INSN_LOCATOR (prev_insn
) = INSN_LOCATOR (insn
);
2311 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2313 if (GET_MODE (insn
) == TImode
)
2314 PUT_MODE (next_insn
, TImode
);
2316 next_insn
= next_active_insn (insn
);
2318 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2319 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2321 if (((addr
+ length
) & 7) != 0)
2323 emit_nop_for_insn (prev_insn
);
2327 else if (GET_MODE (insn
) == TImode
2328 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2329 || get_attr_type (insn
) == TYPE_MULTI0
)
2330 && ((addr
+ length
) & 7) != 0)
2332 /* prev_insn will always be set because the first insn is
2333 always 8-byte aligned. */
2334 emit_nop_for_insn (prev_insn
);
2342 /* Routines for branch hints. */
2345 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2346 int distance
, sbitmap blocks
)
2348 rtx branch_label
= 0;
2353 if (before
== 0 || branch
== 0 || target
== 0)
2356 /* While scheduling we require hints to be no further than 600, so
2357 we need to enforce that here too */
2361 /* If we have a Basic block note, emit it after the basic block note. */
2362 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2363 before
= NEXT_INSN (before
);
2365 branch_label
= gen_label_rtx ();
2366 LABEL_NUSES (branch_label
)++;
2367 LABEL_PRESERVE_P (branch_label
) = 1;
2368 insn
= emit_label_before (branch_label
, branch
);
2369 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2370 SET_BIT (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2372 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2373 recog_memoized (hint
);
2374 INSN_LOCATOR (hint
) = INSN_LOCATOR (branch
);
2375 HINTED_P (branch
) = 1;
2377 if (GET_CODE (target
) == LABEL_REF
)
2378 HINTED_P (XEXP (target
, 0)) = 1;
2379 else if (tablejump_p (branch
, 0, &table
))
2383 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2384 vec
= XVEC (PATTERN (table
), 0);
2386 vec
= XVEC (PATTERN (table
), 1);
2387 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2388 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2391 if (distance
>= 588)
2393 /* Make sure the hint isn't scheduled any earlier than this point,
2394 which could make it too far for the branch offest to fit */
2395 insn
= emit_insn_before (gen_blockage (), hint
);
2396 recog_memoized (insn
);
2397 INSN_LOCATOR (insn
) = INSN_LOCATOR (hint
);
2399 else if (distance
<= 8 * 4)
2401 /* To guarantee at least 8 insns between the hint and branch we
2404 for (d
= distance
; d
< 8 * 4; d
+= 4)
2407 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2408 recog_memoized (insn
);
2409 INSN_LOCATOR (insn
) = INSN_LOCATOR (hint
);
2412 /* Make sure any nops inserted aren't scheduled before the hint. */
2413 insn
= emit_insn_after (gen_blockage (), hint
);
2414 recog_memoized (insn
);
2415 INSN_LOCATOR (insn
) = INSN_LOCATOR (hint
);
2417 /* Make sure any nops inserted aren't scheduled after the call. */
2418 if (CALL_P (branch
) && distance
< 8 * 4)
2420 insn
= emit_insn_before (gen_blockage (), branch
);
2421 recog_memoized (insn
);
2422 INSN_LOCATOR (insn
) = INSN_LOCATOR (branch
);
2427 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2428 the rtx for the branch target. */
2430 get_branch_target (rtx branch
)
2432 if (GET_CODE (branch
) == JUMP_INSN
)
2436 /* Return statements */
2437 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2438 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2441 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2442 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2446 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2449 set
= single_set (branch
);
2450 src
= SET_SRC (set
);
2451 if (GET_CODE (SET_DEST (set
)) != PC
)
2454 if (GET_CODE (src
) == IF_THEN_ELSE
)
2457 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2460 /* If the more probable case is not a fall through, then
2461 try a branch hint. */
2462 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2463 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2464 && GET_CODE (XEXP (src
, 1)) != PC
)
2465 lab
= XEXP (src
, 1);
2466 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2467 && GET_CODE (XEXP (src
, 2)) != PC
)
2468 lab
= XEXP (src
, 2);
2472 if (GET_CODE (lab
) == RETURN
)
2473 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2481 else if (GET_CODE (branch
) == CALL_INSN
)
2484 /* All of our call patterns are in a PARALLEL and the CALL is
2485 the first pattern in the PARALLEL. */
2486 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2488 call
= XVECEXP (PATTERN (branch
), 0, 0);
2489 if (GET_CODE (call
) == SET
)
2490 call
= SET_SRC (call
);
2491 if (GET_CODE (call
) != CALL
)
2493 return XEXP (XEXP (call
, 0), 0);
2498 /* The special $hbr register is used to prevent the insn scheduler from
2499 moving hbr insns across instructions which invalidate them. It
2500 should only be used in a clobber, and this function searches for
2501 insns which clobber it. */
2503 insn_clobbers_hbr (rtx insn
)
2506 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2508 rtx parallel
= PATTERN (insn
);
2511 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2513 clobber
= XVECEXP (parallel
, 0, j
);
2514 if (GET_CODE (clobber
) == CLOBBER
2515 && GET_CODE (XEXP (clobber
, 0)) == REG
2516 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2523 /* Search up to 32 insns starting at FIRST:
2524 - at any kind of hinted branch, just return
2525 - at any unconditional branch in the first 15 insns, just return
2526 - at a call or indirect branch, after the first 15 insns, force it to
2527 an even address and return
2528 - at any unconditional branch, after the first 15 insns, force it to
2530 At then end of the search, insert an hbrp within 4 insns of FIRST,
2531 and an hbrp within 16 instructions of FIRST.
2534 insert_hbrp_for_ilb_runout (rtx first
)
2536 rtx insn
, before_4
= 0, before_16
= 0;
2537 int addr
= 0, length
, first_addr
= -1;
2538 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2539 int insert_lnop_after
= 0;
2540 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2543 if (first_addr
== -1)
2544 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2545 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2546 length
= get_attr_length (insn
);
2548 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2550 /* We test for 14 instructions because the first hbrp will add
2551 up to 2 instructions. */
2552 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2555 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2557 /* Make sure an hbrp is at least 2 cycles away from a hint.
2558 Insert an lnop after the hbrp when necessary. */
2559 if (before_4
== 0 && addr
> 0)
2562 insert_lnop_after
|= 1;
2564 else if (before_4
&& addr
<= 4 * 4)
2565 insert_lnop_after
|= 1;
2566 if (before_16
== 0 && addr
> 10 * 4)
2569 insert_lnop_after
|= 2;
2571 else if (before_16
&& addr
<= 14 * 4)
2572 insert_lnop_after
|= 2;
2575 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2577 if (addr
< hbrp_addr0
)
2579 else if (addr
< hbrp_addr1
)
2583 if (CALL_P (insn
) || JUMP_P (insn
))
2585 if (HINTED_P (insn
))
2588 /* Any branch after the first 15 insns should be on an even
2589 address to avoid a special case branch. There might be
2590 some nops and/or hbrps inserted, so we test after 10
2593 SCHED_ON_EVEN_P (insn
) = 1;
2596 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2600 if (addr
+ length
>= 32 * 4)
2602 gcc_assert (before_4
&& before_16
);
2603 if (hbrp_addr0
> 4 * 4)
2606 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2607 recog_memoized (insn
);
2608 INSN_LOCATOR (insn
) = INSN_LOCATOR (before_4
);
2609 INSN_ADDRESSES_NEW (insn
,
2610 INSN_ADDRESSES (INSN_UID (before_4
)));
2611 PUT_MODE (insn
, GET_MODE (before_4
));
2612 PUT_MODE (before_4
, TImode
);
2613 if (insert_lnop_after
& 1)
2615 insn
= emit_insn_before (gen_lnop (), before_4
);
2616 recog_memoized (insn
);
2617 INSN_LOCATOR (insn
) = INSN_LOCATOR (before_4
);
2618 INSN_ADDRESSES_NEW (insn
,
2619 INSN_ADDRESSES (INSN_UID (before_4
)));
2620 PUT_MODE (insn
, TImode
);
2623 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2624 && hbrp_addr1
> 16 * 4)
2627 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2628 recog_memoized (insn
);
2629 INSN_LOCATOR (insn
) = INSN_LOCATOR (before_16
);
2630 INSN_ADDRESSES_NEW (insn
,
2631 INSN_ADDRESSES (INSN_UID (before_16
)));
2632 PUT_MODE (insn
, GET_MODE (before_16
));
2633 PUT_MODE (before_16
, TImode
);
2634 if (insert_lnop_after
& 2)
2636 insn
= emit_insn_before (gen_lnop (), before_16
);
2637 recog_memoized (insn
);
2638 INSN_LOCATOR (insn
) = INSN_LOCATOR (before_16
);
2639 INSN_ADDRESSES_NEW (insn
,
2640 INSN_ADDRESSES (INSN_UID
2642 PUT_MODE (insn
, TImode
);
2648 else if (BARRIER_P (insn
))
2653 /* The SPU might hang when it executes 48 inline instructions after a
2654 hinted branch jumps to its hinted target. The beginning of a
2655 function and the return from a call might have been hinted, and
2656 must be handled as well. To prevent a hang we insert 2 hbrps. The
2657 first should be within 6 insns of the branch target. The second
2658 should be within 22 insns of the branch target. When determining
2659 if hbrps are necessary, we look for only 32 inline instructions,
2660 because up to 12 nops and 4 hbrps could be inserted. Similarily,
2661 when inserting new hbrps, we insert them within 4 and 16 insns of
2667 if (TARGET_SAFE_HINTS
)
2669 shorten_branches (get_insns ());
2670 /* Insert hbrp at beginning of function */
2671 insn
= next_active_insn (get_insns ());
2673 insert_hbrp_for_ilb_runout (insn
);
2674 /* Insert hbrp after hinted targets. */
2675 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2676 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2677 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2681 static int in_spu_reorg
;
2684 spu_var_tracking (void)
2686 if (flag_var_tracking
)
2689 timevar_push (TV_VAR_TRACKING
);
2690 variable_tracking_main ();
2691 timevar_pop (TV_VAR_TRACKING
);
2692 df_finish_pass (false);
2696 /* Insert branch hints. There are no branch optimizations after this
2697 pass, so it's safe to set our branch hints now. */
2699 spu_machine_dependent_reorg (void)
2704 rtx branch_target
= 0;
2705 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2709 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2711 /* We still do it for unoptimized code because an external
2712 function might have hinted a call or return. */
2715 spu_var_tracking ();
2719 blocks
= sbitmap_alloc (last_basic_block
);
2720 sbitmap_zero (blocks
);
2723 compute_bb_for_insn ();
2728 (struct spu_bb_info
*) xcalloc (n_basic_blocks
,
2729 sizeof (struct spu_bb_info
));
2731 /* We need exact insn addresses and lengths. */
2732 shorten_branches (get_insns ());
2734 for (i
= n_basic_blocks
- 1; i
>= 0; i
--)
2736 bb
= BASIC_BLOCK (i
);
2738 if (spu_bb_info
[i
].prop_jump
)
2740 branch
= spu_bb_info
[i
].prop_jump
;
2741 branch_target
= get_branch_target (branch
);
2742 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2743 required_dist
= spu_hint_dist
;
2745 /* Search from end of a block to beginning. In this loop, find
2746 jumps which need a branch and emit them only when:
2747 - it's an indirect branch and we're at the insn which sets
2749 - we're at an insn that will invalidate the hint. e.g., a
2750 call, another hint insn, inline asm that clobbers $hbr, and
2751 some inlined operations (divmodsi4). Don't consider jumps
2752 because they are only at the end of a block and are
2753 considered when we are deciding whether to propagate
2754 - we're getting too far away from the branch. The hbr insns
2755 only have a signed 10 bit offset
2756 We go back as far as possible so the branch will be considered
2757 for propagation when we get to the beginning of the block. */
2758 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2762 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2764 && ((GET_CODE (branch_target
) == REG
2765 && set_of (branch_target
, insn
) != NULL_RTX
)
2766 || insn_clobbers_hbr (insn
)
2767 || branch_addr
- insn_addr
> 600))
2769 rtx next
= NEXT_INSN (insn
);
2770 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2771 if (insn
!= BB_END (bb
)
2772 && branch_addr
- next_addr
>= required_dist
)
2776 "hint for %i in block %i before %i\n",
2777 INSN_UID (branch
), bb
->index
,
2779 spu_emit_branch_hint (next
, branch
, branch_target
,
2780 branch_addr
- next_addr
, blocks
);
2785 /* JUMP_P will only be true at the end of a block. When
2786 branch is already set it means we've previously decided
2787 to propagate a hint for that branch into this block. */
2788 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2791 if ((branch_target
= get_branch_target (insn
)))
2794 branch_addr
= insn_addr
;
2795 required_dist
= spu_hint_dist
;
2799 if (insn
== BB_HEAD (bb
))
2805 /* If we haven't emitted a hint for this branch yet, it might
2806 be profitable to emit it in one of the predecessor blocks,
2807 especially for loops. */
2809 basic_block prev
= 0, prop
= 0, prev2
= 0;
2810 int loop_exit
= 0, simple_loop
= 0;
2811 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2813 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2814 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2815 prev
= EDGE_PRED (bb
, j
)->src
;
2817 prev2
= EDGE_PRED (bb
, j
)->src
;
2819 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2820 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2822 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2825 /* If this branch is a loop exit then propagate to previous
2826 fallthru block. This catches the cases when it is a simple
2827 loop or when there is an initial branch into the loop. */
2828 if (prev
&& (loop_exit
|| simple_loop
)
2829 && prev
->loop_depth
<= bb
->loop_depth
)
2832 /* If there is only one adjacent predecessor. Don't propagate
2833 outside this loop. This loop_depth test isn't perfect, but
2834 I'm not sure the loop_father member is valid at this point. */
2835 else if (prev
&& single_pred_p (bb
)
2836 && prev
->loop_depth
== bb
->loop_depth
)
2839 /* If this is the JOIN block of a simple IF-THEN then
2840 propogate the hint to the HEADER block. */
2841 else if (prev
&& prev2
2842 && EDGE_COUNT (bb
->preds
) == 2
2843 && EDGE_COUNT (prev
->preds
) == 1
2844 && EDGE_PRED (prev
, 0)->src
== prev2
2845 && prev2
->loop_depth
== bb
->loop_depth
2846 && GET_CODE (branch_target
) != REG
)
2849 /* Don't propagate when:
2850 - this is a simple loop and the hint would be too far
2851 - this is not a simple loop and there are 16 insns in
2853 - the predecessor block ends in a branch that will be
2855 - the predecessor block ends in an insn that invalidates
2859 && (bbend
= BB_END (prop
))
2860 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2861 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2862 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2865 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2866 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2867 bb
->index
, prop
->index
, bb
->loop_depth
,
2868 INSN_UID (branch
), loop_exit
, simple_loop
,
2869 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2871 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2872 spu_bb_info
[prop
->index
].bb_index
= i
;
2874 else if (branch_addr
- next_addr
>= required_dist
)
2877 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2878 INSN_UID (branch
), bb
->index
,
2879 INSN_UID (NEXT_INSN (insn
)));
2880 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2881 branch_addr
- next_addr
, blocks
);
2888 if (!sbitmap_empty_p (blocks
))
2889 find_many_sub_basic_blocks (blocks
);
2891 /* We have to schedule to make sure alignment is ok. */
2892 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2894 /* The hints need to be scheduled, so call it again. */
2896 df_finish_pass (true);
2902 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2903 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2905 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2906 between its branch label and the branch . We don't move the
2907 label because GCC expects it at the beginning of the block. */
2908 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2909 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2910 rtx label
= XEXP (label_ref
, 0);
2913 for (branch
= NEXT_INSN (label
);
2914 !JUMP_P (branch
) && !CALL_P (branch
);
2915 branch
= NEXT_INSN (branch
))
2916 if (NONJUMP_INSN_P (branch
))
2917 offset
+= get_attr_length (branch
);
2919 XVECEXP (unspec
, 0, 0) = plus_constant (label_ref
, offset
);
2922 spu_var_tracking ();
2924 free_bb_for_insn ();
2930 /* Insn scheduling routines, primarily for dual issue. */
2932 spu_sched_issue_rate (void)
2938 uses_ls_unit(rtx insn
)
2940 rtx set
= single_set (insn
);
2942 && (GET_CODE (SET_DEST (set
)) == MEM
2943 || GET_CODE (SET_SRC (set
)) == MEM
))
2952 /* Handle inline asm */
2953 if (INSN_CODE (insn
) == -1)
2955 t
= get_attr_type (insn
);
2980 case TYPE_IPREFETCH
:
2988 /* haifa-sched.c has a static variable that keeps track of the current
2989 cycle. It is passed to spu_sched_reorder, and we record it here for
2990 use by spu_sched_variable_issue. It won't be accurate if the
2991 scheduler updates it's clock_var between the two calls. */
2992 static int clock_var
;
2994 /* This is used to keep track of insn alignment. Set to 0 at the
2995 beginning of each block and increased by the "length" attr of each
2997 static int spu_sched_length
;
2999 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3000 ready list appropriately in spu_sched_reorder(). */
3001 static int pipe0_clock
;
3002 static int pipe1_clock
;
3004 static int prev_clock_var
;
3006 static int prev_priority
;
3008 /* The SPU needs to load the next ilb sometime during the execution of
3009 the previous ilb. There is a potential conflict if every cycle has a
3010 load or store. To avoid the conflict we make sure the load/store
3011 unit is free for at least one cycle during the execution of insns in
3012 the previous ilb. */
3013 static int spu_ls_first
;
3014 static int prev_ls_clock
;
3017 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3018 int max_ready ATTRIBUTE_UNUSED
)
3020 spu_sched_length
= 0;
3024 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3025 int max_ready ATTRIBUTE_UNUSED
)
3027 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
3029 /* When any block might be at least 8-byte aligned, assume they
3030 will all be at least 8-byte aligned to make sure dual issue
3031 works out correctly. */
3032 spu_sched_length
= 0;
3034 spu_ls_first
= INT_MAX
;
3039 prev_clock_var
= -1;
3044 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
3045 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
3049 if (GET_CODE (PATTERN (insn
)) == USE
3050 || GET_CODE (PATTERN (insn
)) == CLOBBER
3051 || (len
= get_attr_length (insn
)) == 0)
3054 spu_sched_length
+= len
;
3056 /* Reset on inline asm */
3057 if (INSN_CODE (insn
) == -1)
3059 spu_ls_first
= INT_MAX
;
3064 p
= get_pipe (insn
);
3066 pipe0_clock
= clock_var
;
3068 pipe1_clock
= clock_var
;
3072 if (clock_var
- prev_ls_clock
> 1
3073 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
3074 spu_ls_first
= INT_MAX
;
3075 if (uses_ls_unit (insn
))
3077 if (spu_ls_first
== INT_MAX
)
3078 spu_ls_first
= spu_sched_length
;
3079 prev_ls_clock
= clock_var
;
3082 /* The scheduler hasn't inserted the nop, but we will later on.
3083 Include those nops in spu_sched_length. */
3084 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
3085 spu_sched_length
+= 4;
3086 prev_clock_var
= clock_var
;
3088 /* more is -1 when called from spu_sched_reorder for new insns
3089 that don't have INSN_PRIORITY */
3091 prev_priority
= INSN_PRIORITY (insn
);
3094 /* Always try issueing more insns. spu_sched_reorder will decide
3095 when the cycle should be advanced. */
3099 /* This function is called for both TARGET_SCHED_REORDER and
3100 TARGET_SCHED_REORDER2. */
3102 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3103 rtx
*ready
, int *nreadyp
, int clock
)
3105 int i
, nready
= *nreadyp
;
3106 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
3111 if (nready
<= 0 || pipe1_clock
>= clock
)
3114 /* Find any rtl insns that don't generate assembly insns and schedule
3116 for (i
= nready
- 1; i
>= 0; i
--)
3119 if (INSN_CODE (insn
) == -1
3120 || INSN_CODE (insn
) == CODE_FOR_blockage
3121 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
3123 ready
[i
] = ready
[nready
- 1];
3124 ready
[nready
- 1] = insn
;
3129 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
3130 for (i
= 0; i
< nready
; i
++)
3131 if (INSN_CODE (ready
[i
]) != -1)
3134 switch (get_attr_type (insn
))
3159 case TYPE_IPREFETCH
:
3165 /* In the first scheduling phase, schedule loads and stores together
3166 to increase the chance they will get merged during postreload CSE. */
3167 if (!reload_completed
&& pipe_ls
>= 0)
3169 insn
= ready
[pipe_ls
];
3170 ready
[pipe_ls
] = ready
[nready
- 1];
3171 ready
[nready
- 1] = insn
;
3175 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3179 /* When we have loads/stores in every cycle of the last 15 insns and
3180 we are about to schedule another load/store, emit an hbrp insn
3183 && spu_sched_length
- spu_ls_first
>= 4 * 15
3184 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
3186 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3187 recog_memoized (insn
);
3188 if (pipe0_clock
< clock
)
3189 PUT_MODE (insn
, TImode
);
3190 spu_sched_variable_issue (file
, verbose
, insn
, -1);
3194 /* In general, we want to emit nops to increase dual issue, but dual
3195 issue isn't faster when one of the insns could be scheduled later
3196 without effecting the critical path. We look at INSN_PRIORITY to
3197 make a good guess, but it isn't perfect so -mdual-nops=n can be
3198 used to effect it. */
3199 if (in_spu_reorg
&& spu_dual_nops
< 10)
3201 /* When we are at an even address and we are not issueing nops to
3202 improve scheduling then we need to advance the cycle. */
3203 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
3204 && (spu_dual_nops
== 0
3207 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
3210 /* When at an odd address, schedule the highest priority insn
3211 without considering pipeline. */
3212 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
3213 && (spu_dual_nops
== 0
3215 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
3220 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3221 pipe0 insn in the ready list, schedule it. */
3222 if (pipe0_clock
< clock
&& pipe_0
>= 0)
3223 schedule_i
= pipe_0
;
3225 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3226 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3228 schedule_i
= pipe_1
;
3230 if (schedule_i
> -1)
3232 insn
= ready
[schedule_i
];
3233 ready
[schedule_i
] = ready
[nready
- 1];
3234 ready
[nready
- 1] = insn
;
3240 /* INSN is dependent on DEP_INSN. */
3242 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
3246 /* The blockage pattern is used to prevent instructions from being
3247 moved across it and has no cost. */
3248 if (INSN_CODE (insn
) == CODE_FOR_blockage
3249 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3252 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3253 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3256 /* Make sure hbrps are spread out. */
3257 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3258 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3261 /* Make sure hints and hbrps are 2 cycles apart. */
3262 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3263 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3264 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3265 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3268 /* An hbrp has no real dependency on other insns. */
3269 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3270 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3273 /* Assuming that it is unlikely an argument register will be used in
3274 the first cycle of the called function, we reduce the cost for
3275 slightly better scheduling of dep_insn. When not hinted, the
3276 mispredicted branch would hide the cost as well. */
3279 rtx target
= get_branch_target (insn
);
3280 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3285 /* And when returning from a function, let's assume the return values
3286 are completed sooner too. */
3287 if (CALL_P (dep_insn
))
3290 /* Make sure an instruction that loads from the back chain is schedule
3291 away from the return instruction so a hint is more likely to get
3293 if (INSN_CODE (insn
) == CODE_FOR__return
3294 && (set
= single_set (dep_insn
))
3295 && GET_CODE (SET_DEST (set
)) == REG
3296 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3299 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3300 scheduler makes every insn in a block anti-dependent on the final
3301 jump_insn. We adjust here so higher cost insns will get scheduled
3303 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3304 return insn_cost (dep_insn
) - 3;
3309 /* Create a CONST_DOUBLE from a string. */
3311 spu_float_const (const char *string
, enum machine_mode mode
)
3313 REAL_VALUE_TYPE value
;
3314 value
= REAL_VALUE_ATOF (string
, mode
);
3315 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3319 spu_constant_address_p (rtx x
)
3321 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3322 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3323 || GET_CODE (x
) == HIGH
);
3326 static enum spu_immediate
3327 which_immediate_load (HOST_WIDE_INT val
)
3329 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3331 if (val
>= -0x8000 && val
<= 0x7fff)
3333 if (val
>= 0 && val
<= 0x3ffff)
3335 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3337 if ((val
& 0xffff) == 0)
3343 /* Return true when OP can be loaded by one of the il instructions, or
3344 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3346 immediate_load_p (rtx op
, enum machine_mode mode
)
3348 if (CONSTANT_P (op
))
3350 enum immediate_class c
= classify_immediate (op
, mode
);
3351 return c
== IC_IL1
|| c
== IC_IL1s
3352 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3357 /* Return true if the first SIZE bytes of arr is a constant that can be
3358 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3359 represent the size and offset of the instruction to use. */
3361 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3363 int cpat
, run
, i
, start
;
3367 for (i
= 0; i
< size
&& cpat
; i
++)
3375 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3377 else if (arr
[i
] == 0)
3379 while (arr
[i
+run
] == run
&& i
+run
< 16)
3381 if (run
!= 4 && run
!= 8)
3386 if ((i
& (run
-1)) != 0)
3393 if (cpat
&& (run
|| size
< 16))
3400 *pstart
= start
== -1 ? 16-run
: start
;
3406 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3407 it into a register. MODE is only valid when OP is a CONST_INT. */
3408 static enum immediate_class
3409 classify_immediate (rtx op
, enum machine_mode mode
)
3412 unsigned char arr
[16];
3413 int i
, j
, repeated
, fsmbi
, repeat
;
3415 gcc_assert (CONSTANT_P (op
));
3417 if (GET_MODE (op
) != VOIDmode
)
3418 mode
= GET_MODE (op
);
3420 /* A V4SI const_vector with all identical symbols is ok. */
3423 && GET_CODE (op
) == CONST_VECTOR
3424 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3425 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3426 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3427 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3428 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3429 op
= CONST_VECTOR_ELT (op
, 0);
3431 switch (GET_CODE (op
))
3435 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3438 /* We can never know if the resulting address fits in 18 bits and can be
3439 loaded with ila. For now, assume the address will not overflow if
3440 the displacement is "small" (fits 'K' constraint). */
3441 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3443 rtx sym
= XEXP (XEXP (op
, 0), 0);
3444 rtx cst
= XEXP (XEXP (op
, 0), 1);
3446 if (GET_CODE (sym
) == SYMBOL_REF
3447 && GET_CODE (cst
) == CONST_INT
3448 && satisfies_constraint_K (cst
))
3457 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3458 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3459 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3465 constant_to_array (mode
, op
, arr
);
3467 /* Check that each 4-byte slot is identical. */
3469 for (i
= 4; i
< 16; i
+= 4)
3470 for (j
= 0; j
< 4; j
++)
3471 if (arr
[j
] != arr
[i
+ j
])
3476 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3477 val
= trunc_int_for_mode (val
, SImode
);
3479 if (which_immediate_load (val
) != SPU_NONE
)
3483 /* Any mode of 2 bytes or smaller can be loaded with an il
3485 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3489 for (i
= 0; i
< 16 && fsmbi
; i
++)
3490 if (arr
[i
] != 0 && repeat
== 0)
3492 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3495 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3497 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3510 static enum spu_immediate
3511 which_logical_immediate (HOST_WIDE_INT val
)
3513 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3515 if (val
>= -0x200 && val
<= 0x1ff)
3517 if (val
>= 0 && val
<= 0xffff)
3519 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3521 val
= trunc_int_for_mode (val
, HImode
);
3522 if (val
>= -0x200 && val
<= 0x1ff)
3524 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3526 val
= trunc_int_for_mode (val
, QImode
);
3527 if (val
>= -0x200 && val
<= 0x1ff)
3534 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3537 const_vector_immediate_p (rtx x
)
3540 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3541 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3542 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3543 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3549 logical_immediate_p (rtx op
, enum machine_mode mode
)
3552 unsigned char arr
[16];
3555 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3556 || GET_CODE (op
) == CONST_VECTOR
);
3558 if (GET_CODE (op
) == CONST_VECTOR
3559 && !const_vector_immediate_p (op
))
3562 if (GET_MODE (op
) != VOIDmode
)
3563 mode
= GET_MODE (op
);
3565 constant_to_array (mode
, op
, arr
);
3567 /* Check that bytes are repeated. */
3568 for (i
= 4; i
< 16; i
+= 4)
3569 for (j
= 0; j
< 4; j
++)
3570 if (arr
[j
] != arr
[i
+ j
])
3573 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3574 val
= trunc_int_for_mode (val
, SImode
);
3576 i
= which_logical_immediate (val
);
3577 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3581 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3584 unsigned char arr
[16];
3587 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3588 || GET_CODE (op
) == CONST_VECTOR
);
3590 if (GET_CODE (op
) == CONST_VECTOR
3591 && !const_vector_immediate_p (op
))
3594 if (GET_MODE (op
) != VOIDmode
)
3595 mode
= GET_MODE (op
);
3597 constant_to_array (mode
, op
, arr
);
3599 /* Check that bytes are repeated. */
3600 for (i
= 4; i
< 16; i
+= 4)
3601 for (j
= 0; j
< 4; j
++)
3602 if (arr
[j
] != arr
[i
+ j
])
3605 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3606 val
= trunc_int_for_mode (val
, SImode
);
3608 return val
>= 0 && val
<= 0xffff;
3612 arith_immediate_p (rtx op
, enum machine_mode mode
,
3613 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3616 unsigned char arr
[16];
3619 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3620 || GET_CODE (op
) == CONST_VECTOR
);
3622 if (GET_CODE (op
) == CONST_VECTOR
3623 && !const_vector_immediate_p (op
))
3626 if (GET_MODE (op
) != VOIDmode
)
3627 mode
= GET_MODE (op
);
3629 constant_to_array (mode
, op
, arr
);
3631 if (VECTOR_MODE_P (mode
))
3632 mode
= GET_MODE_INNER (mode
);
3634 bytes
= GET_MODE_SIZE (mode
);
3635 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3637 /* Check that bytes are repeated. */
3638 for (i
= bytes
; i
< 16; i
+= bytes
)
3639 for (j
= 0; j
< bytes
; j
++)
3640 if (arr
[j
] != arr
[i
+ j
])
3644 for (j
= 1; j
< bytes
; j
++)
3645 val
= (val
<< 8) | arr
[j
];
3647 val
= trunc_int_for_mode (val
, mode
);
3649 return val
>= low
&& val
<= high
;
3652 /* TRUE when op is an immediate and an exact power of 2, and given that
3653 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3654 all entries must be the same. */
3656 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3658 enum machine_mode int_mode
;
3660 unsigned char arr
[16];
3663 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3664 || GET_CODE (op
) == CONST_VECTOR
);
3666 if (GET_CODE (op
) == CONST_VECTOR
3667 && !const_vector_immediate_p (op
))
3670 if (GET_MODE (op
) != VOIDmode
)
3671 mode
= GET_MODE (op
);
3673 constant_to_array (mode
, op
, arr
);
3675 if (VECTOR_MODE_P (mode
))
3676 mode
= GET_MODE_INNER (mode
);
3678 bytes
= GET_MODE_SIZE (mode
);
3679 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3681 /* Check that bytes are repeated. */
3682 for (i
= bytes
; i
< 16; i
+= bytes
)
3683 for (j
= 0; j
< bytes
; j
++)
3684 if (arr
[j
] != arr
[i
+ j
])
3688 for (j
= 1; j
< bytes
; j
++)
3689 val
= (val
<< 8) | arr
[j
];
3691 val
= trunc_int_for_mode (val
, int_mode
);
3693 /* Currently, we only handle SFmode */
3694 gcc_assert (mode
== SFmode
);
3697 int exp
= (val
>> 23) - 127;
3698 return val
> 0 && (val
& 0x007fffff) == 0
3699 && exp
>= low
&& exp
<= high
;
3704 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3707 ea_symbol_ref (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
3712 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3714 rtx plus
= XEXP (x
, 0);
3715 rtx op0
= XEXP (plus
, 0);
3716 rtx op1
= XEXP (plus
, 1);
3717 if (GET_CODE (op1
) == CONST_INT
)
3721 return (GET_CODE (x
) == SYMBOL_REF
3722 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3723 && TREE_CODE (decl
) == VAR_DECL
3724 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3728 - any 32-bit constant (SImode, SFmode)
3729 - any constant that can be generated with fsmbi (any mode)
3730 - a 64-bit constant where the high and low bits are identical
3732 - a 128-bit constant where the four 32-bit words match. */
3734 spu_legitimate_constant_p (enum machine_mode mode
, rtx x
)
3736 if (GET_CODE (x
) == HIGH
)
3739 /* Reject any __ea qualified reference. These can't appear in
3740 instructions but must be forced to the constant pool. */
3741 if (for_each_rtx (&x
, ea_symbol_ref
, 0))
3744 /* V4SI with all identical symbols is valid. */
3747 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3748 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3749 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3750 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3751 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3752 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3754 if (GET_CODE (x
) == CONST_VECTOR
3755 && !const_vector_immediate_p (x
))
3760 /* Valid address are:
3761 - symbol_ref, label_ref, const
3763 - reg + const_int, where const_int is 16 byte aligned
3764 - reg + reg, alignment doesn't matter
3765 The alignment matters in the reg+const case because lqd and stqd
3766 ignore the 4 least significant bits of the const. We only care about
3767 16 byte modes because the expand phase will change all smaller MEM
3768 references to TImode. */
3770 spu_legitimate_address_p (enum machine_mode mode
,
3771 rtx x
, bool reg_ok_strict
)
3773 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3775 && GET_CODE (x
) == AND
3776 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3777 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3779 switch (GET_CODE (x
))
3782 return !TARGET_LARGE_MEM
;
3786 /* Keep __ea references until reload so that spu_expand_mov can see them
3788 if (ea_symbol_ref (&x
, 0))
3789 return !reload_in_progress
&& !reload_completed
;
3790 return !TARGET_LARGE_MEM
;
3793 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3801 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3806 rtx op0
= XEXP (x
, 0);
3807 rtx op1
= XEXP (x
, 1);
3808 if (GET_CODE (op0
) == SUBREG
)
3809 op0
= XEXP (op0
, 0);
3810 if (GET_CODE (op1
) == SUBREG
)
3811 op1
= XEXP (op1
, 0);
3812 if (GET_CODE (op0
) == REG
3813 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3814 && GET_CODE (op1
) == CONST_INT
3815 && INTVAL (op1
) >= -0x2000
3816 && INTVAL (op1
) <= 0x1fff
3817 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3819 if (GET_CODE (op0
) == REG
3820 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3821 && GET_CODE (op1
) == REG
3822 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3833 /* Like spu_legitimate_address_p, except with named addresses. */
3835 spu_addr_space_legitimate_address_p (enum machine_mode mode
, rtx x
,
3836 bool reg_ok_strict
, addr_space_t as
)
3838 if (as
== ADDR_SPACE_EA
)
3839 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3841 else if (as
!= ADDR_SPACE_GENERIC
)
3844 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3847 /* When the address is reg + const_int, force the const_int into a
3850 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3851 enum machine_mode mode ATTRIBUTE_UNUSED
)
3854 /* Make sure both operands are registers. */
3855 if (GET_CODE (x
) == PLUS
)
3859 if (ALIGNED_SYMBOL_REF_P (op0
))
3861 op0
= force_reg (Pmode
, op0
);
3862 mark_reg_pointer (op0
, 128);
3864 else if (GET_CODE (op0
) != REG
)
3865 op0
= force_reg (Pmode
, op0
);
3866 if (ALIGNED_SYMBOL_REF_P (op1
))
3868 op1
= force_reg (Pmode
, op1
);
3869 mark_reg_pointer (op1
, 128);
3871 else if (GET_CODE (op1
) != REG
)
3872 op1
= force_reg (Pmode
, op1
);
3873 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3878 /* Like spu_legitimate_address, except with named address support. */
3880 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, enum machine_mode mode
,
3883 if (as
!= ADDR_SPACE_GENERIC
)
3886 return spu_legitimize_address (x
, oldx
, mode
);
3889 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3890 struct attribute_spec.handler. */
3892 spu_handle_fndecl_attribute (tree
* node
,
3894 tree args ATTRIBUTE_UNUSED
,
3895 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3897 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3899 warning (0, "%qE attribute only applies to functions",
3901 *no_add_attrs
= true;
3907 /* Handle the "vector" attribute. */
3909 spu_handle_vector_attribute (tree
* node
, tree name
,
3910 tree args ATTRIBUTE_UNUSED
,
3911 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3913 tree type
= *node
, result
= NULL_TREE
;
3914 enum machine_mode mode
;
3917 while (POINTER_TYPE_P (type
)
3918 || TREE_CODE (type
) == FUNCTION_TYPE
3919 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3920 type
= TREE_TYPE (type
);
3922 mode
= TYPE_MODE (type
);
3924 unsigned_p
= TYPE_UNSIGNED (type
);
3928 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3931 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3934 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3937 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3940 result
= V4SF_type_node
;
3943 result
= V2DF_type_node
;
3949 /* Propagate qualifiers attached to the element type
3950 onto the vector type. */
3951 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3952 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3954 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3957 warning (0, "%qE attribute ignored", name
);
3959 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3964 /* Return nonzero if FUNC is a naked function. */
3966 spu_naked_function_p (tree func
)
3970 if (TREE_CODE (func
) != FUNCTION_DECL
)
3973 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3974 return a
!= NULL_TREE
;
3978 spu_initial_elimination_offset (int from
, int to
)
3980 int saved_regs_size
= spu_saved_regs_size ();
3982 if (!current_function_is_leaf
|| crtl
->outgoing_args_size
3983 || get_frame_size () || saved_regs_size
)
3984 sp_offset
= STACK_POINTER_OFFSET
;
3985 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3986 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3987 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3988 return get_frame_size ();
3989 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3990 return sp_offset
+ crtl
->outgoing_args_size
3991 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3992 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3993 return get_frame_size () + saved_regs_size
+ sp_offset
;
3999 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
4001 enum machine_mode mode
= TYPE_MODE (type
);
4002 int byte_size
= ((mode
== BLKmode
)
4003 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4005 /* Make sure small structs are left justified in a register. */
4006 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4007 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
4009 enum machine_mode smode
;
4012 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4013 int n
= byte_size
/ UNITS_PER_WORD
;
4014 v
= rtvec_alloc (nregs
);
4015 for (i
= 0; i
< n
; i
++)
4017 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
4018 gen_rtx_REG (TImode
,
4021 GEN_INT (UNITS_PER_WORD
* i
));
4022 byte_size
-= UNITS_PER_WORD
;
4030 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
4032 gen_rtx_EXPR_LIST (VOIDmode
,
4033 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
4034 GEN_INT (UNITS_PER_WORD
* n
));
4036 return gen_rtx_PARALLEL (mode
, v
);
4038 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
4042 spu_function_arg (CUMULATIVE_ARGS
*cum
,
4043 enum machine_mode mode
,
4044 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4048 if (*cum
>= MAX_REGISTER_ARGS
)
4051 byte_size
= ((mode
== BLKmode
)
4052 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4054 /* The ABI does not allow parameters to be passed partially in
4055 reg and partially in stack. */
4056 if ((*cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
4059 /* Make sure small structs are left justified in a register. */
4060 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4061 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
4063 enum machine_mode smode
;
4067 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
4068 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4069 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ *cum
),
4071 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4074 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ *cum
);
4078 spu_function_arg_advance (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4079 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4081 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
4084 ? ((int_size_in_bytes (type
) + 15) / 16)
4087 : HARD_REGNO_NREGS (cum
, mode
));
4090 /* Variable sized types are passed by reference. */
4092 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
4093 enum machine_mode mode ATTRIBUTE_UNUSED
,
4094 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4096 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4102 /* Create and return the va_list datatype.
4104 On SPU, va_list is an array type equivalent to
4106 typedef struct __va_list_tag
4108 void *__args __attribute__((__aligned(16)));
4109 void *__skip __attribute__((__aligned(16)));
4113 where __args points to the arg that will be returned by the next
4114 va_arg(), and __skip points to the previous stack frame such that
4115 when __args == __skip we should advance __args by 32 bytes. */
4117 spu_build_builtin_va_list (void)
4119 tree f_args
, f_skip
, record
, type_decl
;
4122 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4125 build_decl (BUILTINS_LOCATION
,
4126 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4128 f_args
= build_decl (BUILTINS_LOCATION
,
4129 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
4130 f_skip
= build_decl (BUILTINS_LOCATION
,
4131 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
4133 DECL_FIELD_CONTEXT (f_args
) = record
;
4134 DECL_ALIGN (f_args
) = 128;
4135 DECL_USER_ALIGN (f_args
) = 1;
4137 DECL_FIELD_CONTEXT (f_skip
) = record
;
4138 DECL_ALIGN (f_skip
) = 128;
4139 DECL_USER_ALIGN (f_skip
) = 1;
4141 TYPE_STUB_DECL (record
) = type_decl
;
4142 TYPE_NAME (record
) = type_decl
;
4143 TYPE_FIELDS (record
) = f_args
;
4144 DECL_CHAIN (f_args
) = f_skip
;
4146 /* We know this is being padded and we want it too. It is an internal
4147 type so hide the warnings from the user. */
4149 warn_padded
= false;
4151 layout_type (record
);
4155 /* The correct type is an array type of one element. */
4156 return build_array_type (record
, build_index_type (size_zero_node
));
4159 /* Implement va_start by filling the va_list structure VALIST.
4160 NEXTARG points to the first anonymous stack argument.
4162 The following global variables are used to initialize
4163 the va_list structure:
4166 the CUMULATIVE_ARGS for this function
4168 crtl->args.arg_offset_rtx:
4169 holds the offset of the first anonymous stack argument
4170 (relative to the virtual arg pointer). */
4173 spu_va_start (tree valist
, rtx nextarg
)
4175 tree f_args
, f_skip
;
4178 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4179 f_skip
= DECL_CHAIN (f_args
);
4181 valist
= build_simple_mem_ref (valist
);
4183 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4185 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4187 /* Find the __args area. */
4188 t
= make_tree (TREE_TYPE (args
), nextarg
);
4189 if (crtl
->args
.pretend_args_size
> 0)
4190 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
4191 size_int (-STACK_POINTER_OFFSET
));
4192 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4193 TREE_SIDE_EFFECTS (t
) = 1;
4194 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4196 /* Find the __skip area. */
4197 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4198 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
4199 size_int (crtl
->args
.pretend_args_size
4200 - STACK_POINTER_OFFSET
));
4201 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4202 TREE_SIDE_EFFECTS (t
) = 1;
4203 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4206 /* Gimplify va_arg by updating the va_list structure
4207 VALIST as required to retrieve an argument of type
4208 TYPE, and returning that argument.
4210 ret = va_arg(VALIST, TYPE);
4212 generates code equivalent to:
4214 paddedsize = (sizeof(TYPE) + 15) & -16;
4215 if (VALIST.__args + paddedsize > VALIST.__skip
4216 && VALIST.__args <= VALIST.__skip)
4217 addr = VALIST.__skip + 32;
4219 addr = VALIST.__args;
4220 VALIST.__args = addr + paddedsize;
4221 ret = *(TYPE *)addr;
4224 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4225 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4227 tree f_args
, f_skip
;
4229 HOST_WIDE_INT size
, rsize
;
4230 tree paddedsize
, addr
, tmp
;
4231 bool pass_by_reference_p
;
4233 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4234 f_skip
= DECL_CHAIN (f_args
);
4236 valist
= build_simple_mem_ref (valist
);
4238 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4240 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4242 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4244 /* if an object is dynamically sized, a pointer to it is passed
4245 instead of the object itself. */
4246 pass_by_reference_p
= pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4248 if (pass_by_reference_p
)
4249 type
= build_pointer_type (type
);
4250 size
= int_size_in_bytes (type
);
4251 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4253 /* build conditional expression to calculate addr. The expression
4254 will be gimplified later. */
4255 paddedsize
= size_int (rsize
);
4256 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (args
), paddedsize
);
4257 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4258 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4259 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4260 unshare_expr (skip
)));
4262 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4263 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (skip
),
4264 size_int (32)), unshare_expr (args
));
4266 gimplify_assign (addr
, tmp
, pre_p
);
4268 /* update VALIST.__args */
4269 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
4270 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4272 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4275 if (pass_by_reference_p
)
4276 addr
= build_va_arg_indirect_ref (addr
);
4278 return build_va_arg_indirect_ref (addr
);
4281 /* Save parameter registers starting with the register that corresponds
4282 to the first unnamed parameters. If the first unnamed parameter is
4283 in the stack then save no registers. Set pretend_args_size to the
4284 amount of space needed to save the registers. */
4286 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4287 tree type
, int *pretend_size
, int no_rtl
)
4296 /* cum currently points to the last named argument, we want to
4297 start at the next argument. */
4298 spu_function_arg_advance (&ncum
, mode
, type
, true);
4300 offset
= -STACK_POINTER_OFFSET
;
4301 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4303 tmp
= gen_frame_mem (V4SImode
,
4304 plus_constant (virtual_incoming_args_rtx
,
4306 emit_move_insn (tmp
,
4307 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4310 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4315 spu_conditional_register_usage (void)
4319 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4320 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4324 /* This is called any time we inspect the alignment of a register for
4327 reg_aligned_for_addr (rtx x
)
4330 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4331 return REGNO_POINTER_ALIGN (regno
) >= 128;
4334 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4335 into its SYMBOL_REF_FLAGS. */
4337 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4339 default_encode_section_info (decl
, rtl
, first
);
4341 /* If a variable has a forced alignment to < 16 bytes, mark it with
4342 SYMBOL_FLAG_ALIGN1. */
4343 if (TREE_CODE (decl
) == VAR_DECL
4344 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4345 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4348 /* Return TRUE if we are certain the mem refers to a complete object
4349 which is both 16-byte aligned and padded to a 16-byte boundary. This
4350 would make it safe to store with a single instruction.
4351 We guarantee the alignment and padding for static objects by aligning
4352 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4353 FIXME: We currently cannot guarantee this for objects on the stack
4354 because assign_parm_setup_stack calls assign_stack_local with the
4355 alignment of the parameter mode and in that case the alignment never
4356 gets adjusted by LOCAL_ALIGNMENT. */
4358 store_with_one_insn_p (rtx mem
)
4360 enum machine_mode mode
= GET_MODE (mem
);
4361 rtx addr
= XEXP (mem
, 0);
4362 if (mode
== BLKmode
)
4364 if (GET_MODE_SIZE (mode
) >= 16)
4366 /* Only static objects. */
4367 if (GET_CODE (addr
) == SYMBOL_REF
)
4369 /* We use the associated declaration to make sure the access is
4370 referring to the whole object.
4371 We check both MEM_EXPR and SYMBOL_REF_DECL. I'm not sure
4372 if it is necessary. Will there be cases where one exists, and
4373 the other does not? Will there be cases where both exist, but
4374 have different types? */
4375 tree decl
= MEM_EXPR (mem
);
4377 && TREE_CODE (decl
) == VAR_DECL
4378 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4380 decl
= SYMBOL_REF_DECL (addr
);
4382 && TREE_CODE (decl
) == VAR_DECL
4383 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4389 /* Return 1 when the address is not valid for a simple load and store as
4390 required by the '_mov*' patterns. We could make this less strict
4391 for loads, but we prefer mem's to look the same so they are more
4392 likely to be merged. */
4394 address_needs_split (rtx mem
)
4396 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4397 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4398 || !(store_with_one_insn_p (mem
)
4399 || mem_is_padded_component_ref (mem
))))
4405 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4406 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4407 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4409 /* MEM is known to be an __ea qualified memory access. Emit a call to
4410 fetch the ppu memory to local store, and return its address in local
4414 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4418 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4419 if (!cache_fetch_dirty
)
4420 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4421 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4422 2, ea_addr
, EAmode
, ndirty
, SImode
);
4427 cache_fetch
= init_one_libfunc ("__cache_fetch");
4428 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4429 1, ea_addr
, EAmode
);
4433 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4434 dirty bit marking, inline.
4436 The cache control data structure is an array of
4438 struct __cache_tag_array
4440 unsigned int tag_lo[4];
4441 unsigned int tag_hi[4];
4442 void *data_pointer[4];
4444 vector unsigned short dirty_bits[4];
4448 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4452 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4453 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4454 rtx index_mask
= gen_reg_rtx (SImode
);
4455 rtx tag_arr
= gen_reg_rtx (Pmode
);
4456 rtx splat_mask
= gen_reg_rtx (TImode
);
4457 rtx splat
= gen_reg_rtx (V4SImode
);
4458 rtx splat_hi
= NULL_RTX
;
4459 rtx tag_index
= gen_reg_rtx (Pmode
);
4460 rtx block_off
= gen_reg_rtx (SImode
);
4461 rtx tag_addr
= gen_reg_rtx (Pmode
);
4462 rtx tag
= gen_reg_rtx (V4SImode
);
4463 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4464 rtx cache_tag_hi
= NULL_RTX
;
4465 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4466 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4467 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4468 rtx tag_equal_hi
= NULL_RTX
;
4469 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4470 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4471 rtx eq_index
= gen_reg_rtx (SImode
);
4472 rtx bcomp
, hit_label
, hit_ref
, cont_label
, insn
;
4474 if (spu_ea_model
!= 32)
4476 splat_hi
= gen_reg_rtx (V4SImode
);
4477 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4478 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4481 emit_move_insn (index_mask
, plus_constant (tag_size_sym
, -128));
4482 emit_move_insn (tag_arr
, tag_arr_sym
);
4483 v
= 0x0001020300010203LL
;
4484 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4485 ea_addr_si
= ea_addr
;
4486 if (spu_ea_model
!= 32)
4487 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4489 /* tag_index = ea_addr & (tag_array_size - 128) */
4490 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4492 /* splat ea_addr to all 4 slots. */
4493 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4494 /* Similarly for high 32 bits of ea_addr. */
4495 if (spu_ea_model
!= 32)
4496 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4498 /* block_off = ea_addr & 127 */
4499 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4501 /* tag_addr = tag_arr + tag_index */
4502 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4504 /* Read cache tags. */
4505 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4506 if (spu_ea_model
!= 32)
4507 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4508 plus_constant (tag_addr
, 16)));
4510 /* tag = ea_addr & -128 */
4511 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4513 /* Read all four cache data pointers. */
4514 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4515 plus_constant (tag_addr
, 32)));
4518 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4519 if (spu_ea_model
!= 32)
4521 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4522 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4525 /* At most one of the tags compare equal, so tag_equal has one
4526 32-bit slot set to all 1's, with the other slots all zero.
4527 gbb picks off low bit from each byte in the 128-bit registers,
4528 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4530 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4531 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4533 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4534 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4536 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4537 (rotating eq_index mod 16 bytes). */
4538 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4539 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4541 /* Add block offset to form final data address. */
4542 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4544 /* Check that we did hit. */
4545 hit_label
= gen_label_rtx ();
4546 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4547 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4548 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4549 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4551 /* Say that this branch is very likely to happen. */
4552 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4553 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (v
));
4555 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4556 cont_label
= gen_label_rtx ();
4557 emit_jump_insn (gen_jump (cont_label
));
4560 emit_label (hit_label
);
4565 rtx dirty_bits
= gen_reg_rtx (TImode
);
4566 rtx dirty_off
= gen_reg_rtx (SImode
);
4567 rtx dirty_128
= gen_reg_rtx (TImode
);
4568 rtx neg_block_off
= gen_reg_rtx (SImode
);
4570 /* Set up mask with one dirty bit per byte of the mem we are
4571 writing, starting from top bit. */
4573 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4574 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4579 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4581 /* Form index into cache dirty_bits. eq_index is one of
4582 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4583 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4584 offset to each of the four dirty_bits elements. */
4585 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4587 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4589 /* Rotate bit mask to proper bit. */
4590 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4591 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4592 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4594 /* Or in the new dirty bits. */
4595 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4598 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4601 emit_label (cont_label
);
4605 expand_ea_mem (rtx mem
, bool is_store
)
4608 rtx data_addr
= gen_reg_rtx (Pmode
);
4611 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4612 if (optimize_size
|| optimize
== 0)
4613 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4615 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4617 if (ea_alias_set
== -1)
4618 ea_alias_set
= new_alias_set ();
4620 /* We generate a new MEM RTX to refer to the copy of the data
4621 in the cache. We do not copy memory attributes (except the
4622 alignment) from the original MEM, as they may no longer apply
4623 to the cache copy. */
4624 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4625 set_mem_alias_set (new_mem
, ea_alias_set
);
4626 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4632 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4634 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4636 /* Perform the move in the destination SUBREG's inner mode. */
4637 ops
[0] = SUBREG_REG (ops
[0]);
4638 mode
= GET_MODE (ops
[0]);
4639 ops
[1] = gen_lowpart_common (mode
, ops
[1]);
4640 gcc_assert (ops
[1]);
4643 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4645 rtx from
= SUBREG_REG (ops
[1]);
4646 enum machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4648 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4649 && GET_MODE_CLASS (imode
) == MODE_INT
4650 && subreg_lowpart_p (ops
[1]));
4652 if (GET_MODE_SIZE (imode
) < 4)
4654 if (imode
!= GET_MODE (from
))
4655 from
= gen_rtx_SUBREG (imode
, from
, 0);
4657 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4659 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4661 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4664 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4668 /* At least one of the operands needs to be a register. */
4669 if ((reload_in_progress
| reload_completed
) == 0
4670 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4672 rtx temp
= force_reg (mode
, ops
[1]);
4673 emit_move_insn (ops
[0], temp
);
4676 if (reload_in_progress
|| reload_completed
)
4678 if (CONSTANT_P (ops
[1]))
4679 return spu_split_immediate (ops
);
4683 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4685 if (GET_CODE (ops
[1]) == CONST_INT
)
4687 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4688 if (val
!= INTVAL (ops
[1]))
4690 emit_move_insn (ops
[0], GEN_INT (val
));
4696 if (MEM_ADDR_SPACE (ops
[0]))
4697 ops
[0] = expand_ea_mem (ops
[0], true);
4698 return spu_split_store (ops
);
4702 if (MEM_ADDR_SPACE (ops
[1]))
4703 ops
[1] = expand_ea_mem (ops
[1], false);
4704 return spu_split_load (ops
);
4711 spu_convert_move (rtx dst
, rtx src
)
4713 enum machine_mode mode
= GET_MODE (dst
);
4714 enum machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4716 gcc_assert (GET_MODE (src
) == TImode
);
4717 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4718 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4719 gen_rtx_TRUNCATE (int_mode
,
4720 gen_rtx_LSHIFTRT (TImode
, src
,
4721 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4722 if (int_mode
!= mode
)
4724 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4725 emit_move_insn (dst
, reg
);
4729 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4730 the address from SRC and SRC+16. Return a REG or CONST_INT that
4731 specifies how many bytes to rotate the loaded registers, plus any
4732 extra from EXTRA_ROTQBY. The address and rotate amounts are
4733 normalized to improve merging of loads and rotate computations. */
4735 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4737 rtx addr
= XEXP (src
, 0);
4738 rtx p0
, p1
, rot
, addr0
, addr1
;
4744 if (MEM_ALIGN (src
) >= 128)
4745 /* Address is already aligned; simply perform a TImode load. */ ;
4746 else if (GET_CODE (addr
) == PLUS
)
4749 aligned reg + aligned reg => lqx
4750 aligned reg + unaligned reg => lqx, rotqby
4751 aligned reg + aligned const => lqd
4752 aligned reg + unaligned const => lqd, rotqbyi
4753 unaligned reg + aligned reg => lqx, rotqby
4754 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4755 unaligned reg + aligned const => lqd, rotqby
4756 unaligned reg + unaligned const -> not allowed by legitimate address
4758 p0
= XEXP (addr
, 0);
4759 p1
= XEXP (addr
, 1);
4760 if (!reg_aligned_for_addr (p0
))
4762 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4764 rot
= gen_reg_rtx (SImode
);
4765 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4767 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4771 && INTVAL (p1
) * BITS_PER_UNIT
4772 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4774 rot
= gen_reg_rtx (SImode
);
4775 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4780 rtx x
= gen_reg_rtx (SImode
);
4781 emit_move_insn (x
, p1
);
4782 if (!spu_arith_operand (p1
, SImode
))
4784 rot
= gen_reg_rtx (SImode
);
4785 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4786 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4794 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4796 rot_amt
= INTVAL (p1
) & 15;
4797 if (INTVAL (p1
) & -16)
4799 p1
= GEN_INT (INTVAL (p1
) & -16);
4800 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4805 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4809 else if (REG_P (addr
))
4811 if (!reg_aligned_for_addr (addr
))
4814 else if (GET_CODE (addr
) == CONST
)
4816 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4817 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4818 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4820 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4822 addr
= gen_rtx_CONST (Pmode
,
4823 gen_rtx_PLUS (Pmode
,
4824 XEXP (XEXP (addr
, 0), 0),
4825 GEN_INT (rot_amt
& -16)));
4827 addr
= XEXP (XEXP (addr
, 0), 0);
4831 rot
= gen_reg_rtx (Pmode
);
4832 emit_move_insn (rot
, addr
);
4835 else if (GET_CODE (addr
) == CONST_INT
)
4837 rot_amt
= INTVAL (addr
);
4838 addr
= GEN_INT (rot_amt
& -16);
4840 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4842 rot
= gen_reg_rtx (Pmode
);
4843 emit_move_insn (rot
, addr
);
4846 rot_amt
+= extra_rotby
;
4852 rtx x
= gen_reg_rtx (SImode
);
4853 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4857 if (!rot
&& rot_amt
)
4858 rot
= GEN_INT (rot_amt
);
4860 addr0
= copy_rtx (addr
);
4861 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4862 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4866 addr1
= plus_constant (copy_rtx (addr
), 16);
4867 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4868 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4875 spu_split_load (rtx
* ops
)
4877 enum machine_mode mode
= GET_MODE (ops
[0]);
4878 rtx addr
, load
, rot
;
4881 if (GET_MODE_SIZE (mode
) >= 16)
4884 addr
= XEXP (ops
[1], 0);
4885 gcc_assert (GET_CODE (addr
) != AND
);
4887 if (!address_needs_split (ops
[1]))
4889 ops
[1] = change_address (ops
[1], TImode
, addr
);
4890 load
= gen_reg_rtx (TImode
);
4891 emit_insn (gen__movti (load
, ops
[1]));
4892 spu_convert_move (ops
[0], load
);
4896 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4898 load
= gen_reg_rtx (TImode
);
4899 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4902 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4904 spu_convert_move (ops
[0], load
);
4909 spu_split_store (rtx
* ops
)
4911 enum machine_mode mode
= GET_MODE (ops
[0]);
4913 rtx addr
, p0
, p1
, p1_lo
, smem
;
4917 if (GET_MODE_SIZE (mode
) >= 16)
4920 addr
= XEXP (ops
[0], 0);
4921 gcc_assert (GET_CODE (addr
) != AND
);
4923 if (!address_needs_split (ops
[0]))
4925 reg
= gen_reg_rtx (TImode
);
4926 emit_insn (gen_spu_convert (reg
, ops
[1]));
4927 ops
[0] = change_address (ops
[0], TImode
, addr
);
4928 emit_move_insn (ops
[0], reg
);
4932 if (GET_CODE (addr
) == PLUS
)
4935 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4936 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4937 aligned reg + aligned const => lqd, c?d, shuf, stqx
4938 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4939 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4940 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4941 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4942 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4945 p0
= XEXP (addr
, 0);
4946 p1
= p1_lo
= XEXP (addr
, 1);
4947 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4949 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4950 if (reg_aligned_for_addr (p0
))
4952 p1
= GEN_INT (INTVAL (p1
) & -16);
4953 if (p1
== const0_rtx
)
4956 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4960 rtx x
= gen_reg_rtx (SImode
);
4961 emit_move_insn (x
, p1
);
4962 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4966 else if (REG_P (addr
))
4970 p1
= p1_lo
= const0_rtx
;
4975 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4976 p1
= 0; /* aform doesn't use p1 */
4978 if (ALIGNED_SYMBOL_REF_P (addr
))
4980 else if (GET_CODE (addr
) == CONST
4981 && GET_CODE (XEXP (addr
, 0)) == PLUS
4982 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4983 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4985 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4987 addr
= gen_rtx_CONST (Pmode
,
4988 gen_rtx_PLUS (Pmode
,
4989 XEXP (XEXP (addr
, 0), 0),
4990 GEN_INT (v
& -16)));
4992 addr
= XEXP (XEXP (addr
, 0), 0);
4993 p1_lo
= GEN_INT (v
& 15);
4995 else if (GET_CODE (addr
) == CONST_INT
)
4997 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4998 addr
= GEN_INT (INTVAL (addr
) & -16);
5002 p1_lo
= gen_reg_rtx (SImode
);
5003 emit_move_insn (p1_lo
, addr
);
5007 gcc_assert (aform
== 0 || aform
== 1);
5008 reg
= gen_reg_rtx (TImode
);
5010 scalar
= store_with_one_insn_p (ops
[0]);
5013 /* We could copy the flags from the ops[0] MEM to mem here,
5014 We don't because we want this load to be optimized away if
5015 possible, and copying the flags will prevent that in certain
5016 cases, e.g. consider the volatile flag. */
5018 rtx pat
= gen_reg_rtx (TImode
);
5019 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
5020 set_mem_alias_set (lmem
, 0);
5021 emit_insn (gen_movti (reg
, lmem
));
5023 if (!p0
|| reg_aligned_for_addr (p0
))
5024 p0
= stack_pointer_rtx
;
5028 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
5029 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
5033 if (GET_CODE (ops
[1]) == REG
)
5034 emit_insn (gen_spu_convert (reg
, ops
[1]));
5035 else if (GET_CODE (ops
[1]) == SUBREG
)
5036 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
5041 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
5042 emit_insn (gen_ashlti3
5043 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
5045 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
5046 /* We can't use the previous alias set because the memory has changed
5047 size and can potentially overlap objects of other types. */
5048 set_mem_alias_set (smem
, 0);
5050 emit_insn (gen_movti (smem
, reg
));
5054 /* Return TRUE if X is MEM which is a struct member reference
5055 and the member can safely be loaded and stored with a single
5056 instruction because it is padded. */
5058 mem_is_padded_component_ref (rtx x
)
5060 tree t
= MEM_EXPR (x
);
5062 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
5064 t
= TREE_OPERAND (t
, 1);
5065 if (!t
|| TREE_CODE (t
) != FIELD_DECL
5066 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
5068 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5069 r
= DECL_FIELD_CONTEXT (t
);
5070 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
5072 /* Make sure they are the same mode */
5073 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
5075 /* If there are no following fields then the field alignment assures
5076 the structure is padded to the alignment which means this field is
5078 if (TREE_CHAIN (t
) == 0)
5080 /* If the following field is also aligned then this field will be
5083 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
5088 /* Parse the -mfixed-range= option string. */
5090 fix_range (const char *const_str
)
5093 char *str
, *dash
, *comma
;
5095 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5096 REG2 are either register names or register numbers. The effect
5097 of this option is to mark the registers in the range from REG1 to
5098 REG2 as ``fixed'' so they won't be used by the compiler. */
5100 i
= strlen (const_str
);
5101 str
= (char *) alloca (i
+ 1);
5102 memcpy (str
, const_str
, i
+ 1);
5106 dash
= strchr (str
, '-');
5109 warning (0, "value of -mfixed-range must have form REG1-REG2");
5113 comma
= strchr (dash
+ 1, ',');
5117 first
= decode_reg_name (str
);
5120 warning (0, "unknown register name: %s", str
);
5124 last
= decode_reg_name (dash
+ 1);
5127 warning (0, "unknown register name: %s", dash
+ 1);
5135 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5139 for (i
= first
; i
<= last
; ++i
)
5140 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5150 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5151 can be generated using the fsmbi instruction. */
5153 fsmbi_const_p (rtx x
)
5157 /* We can always choose TImode for CONST_INT because the high bits
5158 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5159 enum immediate_class c
= classify_immediate (x
, TImode
);
5160 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
5165 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5166 can be generated using the cbd, chd, cwd or cdd instruction. */
5168 cpat_const_p (rtx x
, enum machine_mode mode
)
5172 enum immediate_class c
= classify_immediate (x
, mode
);
5173 return c
== IC_CPAT
;
5179 gen_cpat_const (rtx
* ops
)
5181 unsigned char dst
[16];
5182 int i
, offset
, shift
, isize
;
5183 if (GET_CODE (ops
[3]) != CONST_INT
5184 || GET_CODE (ops
[2]) != CONST_INT
5185 || (GET_CODE (ops
[1]) != CONST_INT
5186 && GET_CODE (ops
[1]) != REG
))
5188 if (GET_CODE (ops
[1]) == REG
5189 && (!REG_POINTER (ops
[1])
5190 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
5193 for (i
= 0; i
< 16; i
++)
5195 isize
= INTVAL (ops
[3]);
5198 else if (isize
== 2)
5202 offset
= (INTVAL (ops
[2]) +
5203 (GET_CODE (ops
[1]) ==
5204 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5205 for (i
= 0; i
< isize
; i
++)
5206 dst
[offset
+ i
] = i
+ shift
;
5207 return array_to_constant (TImode
, dst
);
5210 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5211 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5212 than 16 bytes, the value is repeated across the rest of the array. */
5214 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
5219 memset (arr
, 0, 16);
5220 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5221 if (GET_CODE (x
) == CONST_INT
5222 || (GET_CODE (x
) == CONST_DOUBLE
5223 && (mode
== SFmode
|| mode
== DFmode
)))
5225 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5227 if (GET_CODE (x
) == CONST_DOUBLE
)
5228 val
= const_double_to_hwint (x
);
5231 first
= GET_MODE_SIZE (mode
) - 1;
5232 for (i
= first
; i
>= 0; i
--)
5234 arr
[i
] = val
& 0xff;
5237 /* Splat the constant across the whole array. */
5238 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5241 j
= (j
== first
) ? 0 : j
+ 1;
5244 else if (GET_CODE (x
) == CONST_DOUBLE
)
5246 val
= CONST_DOUBLE_LOW (x
);
5247 for (i
= 15; i
>= 8; i
--)
5249 arr
[i
] = val
& 0xff;
5252 val
= CONST_DOUBLE_HIGH (x
);
5253 for (i
= 7; i
>= 0; i
--)
5255 arr
[i
] = val
& 0xff;
5259 else if (GET_CODE (x
) == CONST_VECTOR
)
5263 mode
= GET_MODE_INNER (mode
);
5264 units
= CONST_VECTOR_NUNITS (x
);
5265 for (i
= 0; i
< units
; i
++)
5267 elt
= CONST_VECTOR_ELT (x
, i
);
5268 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5270 if (GET_CODE (elt
) == CONST_DOUBLE
)
5271 val
= const_double_to_hwint (elt
);
5274 first
= GET_MODE_SIZE (mode
) - 1;
5275 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5277 for (j
= first
; j
>= 0; j
--)
5279 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5289 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5290 smaller than 16 bytes, use the bytes that would represent that value
5291 in a register, e.g., for QImode return the value of arr[3]. */
5293 array_to_constant (enum machine_mode mode
, const unsigned char arr
[16])
5295 enum machine_mode inner_mode
;
5297 int units
, size
, i
, j
, k
;
5300 if (GET_MODE_CLASS (mode
) == MODE_INT
5301 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5303 j
= GET_MODE_SIZE (mode
);
5304 i
= j
< 4 ? 4 - j
: 0;
5305 for (val
= 0; i
< j
; i
++)
5306 val
= (val
<< 8) | arr
[i
];
5307 val
= trunc_int_for_mode (val
, mode
);
5308 return GEN_INT (val
);
5314 for (i
= high
= 0; i
< 8; i
++)
5315 high
= (high
<< 8) | arr
[i
];
5316 for (i
= 8, val
= 0; i
< 16; i
++)
5317 val
= (val
<< 8) | arr
[i
];
5318 return immed_double_const (val
, high
, TImode
);
5322 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5323 val
= trunc_int_for_mode (val
, SImode
);
5324 return hwint_to_const_double (SFmode
, val
);
5328 for (i
= 0, val
= 0; i
< 8; i
++)
5329 val
= (val
<< 8) | arr
[i
];
5330 return hwint_to_const_double (DFmode
, val
);
5333 if (!VECTOR_MODE_P (mode
))
5336 units
= GET_MODE_NUNITS (mode
);
5337 size
= GET_MODE_UNIT_SIZE (mode
);
5338 inner_mode
= GET_MODE_INNER (mode
);
5339 v
= rtvec_alloc (units
);
5341 for (k
= i
= 0; i
< units
; ++i
)
5344 for (j
= 0; j
< size
; j
++, k
++)
5345 val
= (val
<< 8) | arr
[k
];
5347 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5348 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5350 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5355 return gen_rtx_CONST_VECTOR (mode
, v
);
5359 reloc_diagnostic (rtx x
)
5362 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5365 if (GET_CODE (x
) == SYMBOL_REF
)
5366 decl
= SYMBOL_REF_DECL (x
);
5367 else if (GET_CODE (x
) == CONST
5368 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5369 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5371 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5372 if (decl
&& !DECL_P (decl
))
5375 /* The decl could be a string constant. */
5376 if (decl
&& DECL_P (decl
))
5379 /* We use last_assemble_variable_decl to get line information. It's
5380 not always going to be right and might not even be close, but will
5381 be right for the more common cases. */
5382 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5383 loc
= DECL_SOURCE_LOCATION (decl
);
5385 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5387 if (TARGET_WARN_RELOC
)
5389 "creating run-time relocation for %qD", decl
);
5392 "creating run-time relocation for %qD", decl
);
5396 if (TARGET_WARN_RELOC
)
5397 warning_at (input_location
, 0, "creating run-time relocation");
5399 error_at (input_location
, "creating run-time relocation");
5403 /* Hook into assemble_integer so we can generate an error for run-time
5404 relocations. The SPU ABI disallows them. */
5406 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5408 /* By default run-time relocations aren't supported, but we allow them
5409 in case users support it in their own run-time loader. And we provide
5410 a warning for those users that don't. */
5411 if ((GET_CODE (x
) == SYMBOL_REF
)
5412 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5413 reloc_diagnostic (x
);
5415 return default_assemble_integer (x
, size
, aligned_p
);
5419 spu_asm_globalize_label (FILE * file
, const char *name
)
5421 fputs ("\t.global\t", file
);
5422 assemble_name (file
, name
);
5427 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
,
5428 bool speed ATTRIBUTE_UNUSED
)
5430 enum machine_mode mode
= GET_MODE (x
);
5431 int cost
= COSTS_N_INSNS (2);
5433 /* Folding to a CONST_VECTOR will use extra space but there might
5434 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5435 only if it allows us to fold away multiple insns. Changing the cost
5436 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5437 because this cost will only be compared against a single insn.
5438 if (code == CONST_VECTOR)
5439 return spu_legitimate_constant_p (mode, x) ? cost : COSTS_N_INSNS (6);
5442 /* Use defaults for float operations. Not accurate but good enough. */
5445 *total
= COSTS_N_INSNS (13);
5450 *total
= COSTS_N_INSNS (6);
5456 if (satisfies_constraint_K (x
))
5458 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5459 *total
= COSTS_N_INSNS (1);
5461 *total
= COSTS_N_INSNS (3);
5465 *total
= COSTS_N_INSNS (3);
5470 *total
= COSTS_N_INSNS (0);
5474 *total
= COSTS_N_INSNS (5);
5478 case FLOAT_TRUNCATE
:
5480 case UNSIGNED_FLOAT
:
5483 *total
= COSTS_N_INSNS (7);
5489 *total
= COSTS_N_INSNS (9);
5496 GET_CODE (XEXP (x
, 0)) ==
5497 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5498 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5500 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5502 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5503 cost
= COSTS_N_INSNS (14);
5504 if ((val
& 0xffff) == 0)
5505 cost
= COSTS_N_INSNS (9);
5506 else if (val
> 0 && val
< 0x10000)
5507 cost
= COSTS_N_INSNS (11);
5516 *total
= COSTS_N_INSNS (20);
5523 *total
= COSTS_N_INSNS (4);
5526 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5527 *total
= COSTS_N_INSNS (0);
5529 *total
= COSTS_N_INSNS (4);
5532 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5533 if (GET_MODE_CLASS (mode
) == MODE_INT
5534 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5535 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5536 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5541 static enum machine_mode
5542 spu_unwind_word_mode (void)
5547 /* Decide whether we can make a sibling call to a function. DECL is the
5548 declaration of the function being targeted by the call and EXP is the
5549 CALL_EXPR representing the call. */
5551 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5553 return decl
&& !TARGET_LARGE_MEM
;
5556 /* We need to correctly update the back chain pointer and the Available
5557 Stack Size (which is in the second slot of the sp register.) */
5559 spu_allocate_stack (rtx op0
, rtx op1
)
5562 rtx chain
= gen_reg_rtx (V4SImode
);
5563 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5564 rtx sp
= gen_reg_rtx (V4SImode
);
5565 rtx splatted
= gen_reg_rtx (V4SImode
);
5566 rtx pat
= gen_reg_rtx (TImode
);
5568 /* copy the back chain so we can save it back again. */
5569 emit_move_insn (chain
, stack_bot
);
5571 op1
= force_reg (SImode
, op1
);
5573 v
= 0x1020300010203ll
;
5574 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5575 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5577 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5578 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5580 if (flag_stack_check
)
5582 rtx avail
= gen_reg_rtx(SImode
);
5583 rtx result
= gen_reg_rtx(SImode
);
5584 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5585 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5586 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5589 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5591 emit_move_insn (stack_bot
, chain
);
5593 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5597 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5599 static unsigned char arr
[16] =
5600 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5601 rtx temp
= gen_reg_rtx (SImode
);
5602 rtx temp2
= gen_reg_rtx (SImode
);
5603 rtx temp3
= gen_reg_rtx (V4SImode
);
5604 rtx temp4
= gen_reg_rtx (V4SImode
);
5605 rtx pat
= gen_reg_rtx (TImode
);
5606 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5608 /* Restore the backchain from the first word, sp from the second. */
5609 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5610 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5612 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5614 /* Compute Available Stack Size for sp */
5615 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5616 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5618 /* Compute Available Stack Size for back chain */
5619 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5620 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5621 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5623 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5624 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5628 spu_init_libfuncs (void)
5630 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5631 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5632 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5633 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5634 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5635 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5636 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5637 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5638 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5639 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5640 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5642 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5643 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5645 set_optab_libfunc (addv_optab
, SImode
, "__addvsi3");
5646 set_optab_libfunc (subv_optab
, SImode
, "__subvsi3");
5647 set_optab_libfunc (smulv_optab
, SImode
, "__mulvsi3");
5648 set_optab_libfunc (sdivv_optab
, SImode
, "__divvsi3");
5649 set_optab_libfunc (negv_optab
, SImode
, "__negvsi2");
5650 set_optab_libfunc (absv_optab
, SImode
, "__absvsi2");
5651 set_optab_libfunc (addv_optab
, DImode
, "__addvdi3");
5652 set_optab_libfunc (subv_optab
, DImode
, "__subvdi3");
5653 set_optab_libfunc (smulv_optab
, DImode
, "__mulvdi3");
5654 set_optab_libfunc (sdivv_optab
, DImode
, "__divvdi3");
5655 set_optab_libfunc (negv_optab
, DImode
, "__negvdi2");
5656 set_optab_libfunc (absv_optab
, DImode
, "__absvdi2");
5658 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5659 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5660 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5661 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5662 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5663 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5666 /* Make a subreg, stripping any existing subreg. We could possibly just
5667 call simplify_subreg, but in this case we know what we want. */
5669 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5671 if (GET_CODE (x
) == SUBREG
)
5673 if (GET_MODE (x
) == mode
)
5675 return gen_rtx_SUBREG (mode
, x
, 0);
5679 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5681 return (TYPE_MODE (type
) == BLKmode
5683 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5684 || int_size_in_bytes (type
) >
5685 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5688 /* Create the built-in types and functions */
5690 enum spu_function_code
5692 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5693 #include "spu-builtins.def"
5698 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5700 struct spu_builtin_description spu_builtins
[] = {
5701 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5702 {fcode, icode, name, type, params},
5703 #include "spu-builtins.def"
5707 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5709 /* Returns the spu builtin decl for CODE. */
5712 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5714 if (code
>= NUM_SPU_BUILTINS
)
5715 return error_mark_node
;
5717 return spu_builtin_decls
[code
];
5722 spu_init_builtins (void)
5724 struct spu_builtin_description
*d
;
5727 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5728 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5729 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5730 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5731 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5732 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5734 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5735 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5736 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5737 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5739 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5741 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5742 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5743 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5744 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5745 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5746 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5747 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5748 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5749 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5750 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5751 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5752 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5754 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5755 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5756 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5757 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5758 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5759 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5760 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5761 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5763 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5764 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5766 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5768 spu_builtin_types
[SPU_BTI_PTR
] =
5769 build_pointer_type (build_qualified_type
5771 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5773 /* For each builtin we build a new prototype. The tree code will make
5774 sure nodes are shared. */
5775 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5778 char name
[64]; /* build_function will make a copy. */
5784 /* Find last parm. */
5785 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5790 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5792 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5794 sprintf (name
, "__builtin_%s", d
->name
);
5795 spu_builtin_decls
[i
] =
5796 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5797 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5798 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5800 /* These builtins don't throw. */
5801 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5806 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5808 static unsigned char arr
[16] =
5809 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5811 rtx temp
= gen_reg_rtx (Pmode
);
5812 rtx temp2
= gen_reg_rtx (V4SImode
);
5813 rtx temp3
= gen_reg_rtx (V4SImode
);
5814 rtx pat
= gen_reg_rtx (TImode
);
5815 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5817 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5819 /* Restore the sp. */
5820 emit_move_insn (temp
, op1
);
5821 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5823 /* Compute available stack size for sp. */
5824 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5825 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5827 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5828 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5832 spu_safe_dma (HOST_WIDE_INT channel
)
5834 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5838 spu_builtin_splats (rtx ops
[])
5840 enum machine_mode mode
= GET_MODE (ops
[0]);
5841 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5843 unsigned char arr
[16];
5844 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5845 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5849 rtx reg
= gen_reg_rtx (TImode
);
5851 if (GET_CODE (ops
[1]) != REG
5852 && GET_CODE (ops
[1]) != SUBREG
)
5853 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5859 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5865 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5870 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5875 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5881 emit_move_insn (reg
, shuf
);
5882 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5887 spu_builtin_extract (rtx ops
[])
5889 enum machine_mode mode
;
5892 mode
= GET_MODE (ops
[1]);
5894 if (GET_CODE (ops
[2]) == CONST_INT
)
5899 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5902 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5905 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5908 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5911 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5914 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5922 from
= spu_gen_subreg (TImode
, ops
[1]);
5923 rot
= gen_reg_rtx (TImode
);
5924 tmp
= gen_reg_rtx (SImode
);
5929 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5932 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5933 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5937 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5941 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5946 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5948 emit_insn (gen_spu_convert (ops
[0], rot
));
5952 spu_builtin_insert (rtx ops
[])
5954 enum machine_mode mode
= GET_MODE (ops
[0]);
5955 enum machine_mode imode
= GET_MODE_INNER (mode
);
5956 rtx mask
= gen_reg_rtx (TImode
);
5959 if (GET_CODE (ops
[3]) == CONST_INT
)
5960 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5963 offset
= gen_reg_rtx (SImode
);
5964 emit_insn (gen_mulsi3
5965 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5968 (mask
, stack_pointer_rtx
, offset
,
5969 GEN_INT (GET_MODE_SIZE (imode
))));
5970 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5974 spu_builtin_promote (rtx ops
[])
5976 enum machine_mode mode
, imode
;
5977 rtx rot
, from
, offset
;
5980 mode
= GET_MODE (ops
[0]);
5981 imode
= GET_MODE_INNER (mode
);
5983 from
= gen_reg_rtx (TImode
);
5984 rot
= spu_gen_subreg (TImode
, ops
[0]);
5986 emit_insn (gen_spu_convert (from
, ops
[1]));
5988 if (GET_CODE (ops
[2]) == CONST_INT
)
5990 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5991 if (GET_MODE_SIZE (imode
) < 4)
5992 pos
+= 4 - GET_MODE_SIZE (imode
);
5993 offset
= GEN_INT (pos
& 15);
5997 offset
= gen_reg_rtx (SImode
);
6001 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
6004 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
6005 emit_insn (gen_addsi3 (offset
, offset
, offset
));
6009 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
6010 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
6014 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
6020 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
6024 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
6026 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
6027 rtx shuf
= gen_reg_rtx (V4SImode
);
6028 rtx insn
= gen_reg_rtx (V4SImode
);
6033 fnaddr
= force_reg (SImode
, fnaddr
);
6034 cxt
= force_reg (SImode
, cxt
);
6036 if (TARGET_LARGE_MEM
)
6038 rtx rotl
= gen_reg_rtx (V4SImode
);
6039 rtx mask
= gen_reg_rtx (V4SImode
);
6040 rtx bi
= gen_reg_rtx (SImode
);
6041 static unsigned char const shufa
[16] = {
6042 2, 3, 0, 1, 18, 19, 16, 17,
6043 0, 1, 2, 3, 16, 17, 18, 19
6045 static unsigned char const insna
[16] = {
6047 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
6049 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6052 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
6053 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
6055 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
6056 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
6057 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
6058 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
6060 mem
= adjust_address (m_tramp
, V4SImode
, 0);
6061 emit_move_insn (mem
, insn
);
6063 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
6064 mem
= adjust_address (m_tramp
, Pmode
, 16);
6065 emit_move_insn (mem
, bi
);
6069 rtx scxt
= gen_reg_rtx (SImode
);
6070 rtx sfnaddr
= gen_reg_rtx (SImode
);
6071 static unsigned char const insna
[16] = {
6072 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
6078 shufc
= gen_reg_rtx (TImode
);
6079 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
6081 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6082 fits 18 bits and the last 4 are zeros. This will be true if
6083 the stack pointer is initialized to 0x3fff0 at program start,
6084 otherwise the ila instruction will be garbage. */
6086 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
6087 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
6089 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
6090 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
6091 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
6093 mem
= adjust_address (m_tramp
, V4SImode
, 0);
6094 emit_move_insn (mem
, insn
);
6096 emit_insn (gen_sync ());
6100 spu_expand_sign_extend (rtx ops
[])
6102 unsigned char arr
[16];
6103 rtx pat
= gen_reg_rtx (TImode
);
6106 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
6107 if (GET_MODE (ops
[1]) == QImode
)
6109 sign
= gen_reg_rtx (HImode
);
6110 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
6111 for (i
= 0; i
< 16; i
++)
6117 for (i
= 0; i
< 16; i
++)
6119 switch (GET_MODE (ops
[1]))
6122 sign
= gen_reg_rtx (SImode
);
6123 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
6125 arr
[last
- 1] = 0x02;
6128 sign
= gen_reg_rtx (SImode
);
6129 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
6130 for (i
= 0; i
< 4; i
++)
6131 arr
[last
- i
] = 3 - i
;
6134 sign
= gen_reg_rtx (SImode
);
6135 c
= gen_reg_rtx (SImode
);
6136 emit_insn (gen_spu_convert (c
, ops
[1]));
6137 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
6138 for (i
= 0; i
< 8; i
++)
6139 arr
[last
- i
] = 7 - i
;
6145 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
6146 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
6149 /* expand vector initialization. If there are any constant parts,
6150 load constant parts first. Then load any non-constant parts. */
6152 spu_expand_vector_init (rtx target
, rtx vals
)
6154 enum machine_mode mode
= GET_MODE (target
);
6155 int n_elts
= GET_MODE_NUNITS (mode
);
6157 bool all_same
= true;
6158 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
6161 first
= XVECEXP (vals
, 0, 0);
6162 for (i
= 0; i
< n_elts
; ++i
)
6164 x
= XVECEXP (vals
, 0, i
);
6165 if (!(CONST_INT_P (x
)
6166 || GET_CODE (x
) == CONST_DOUBLE
6167 || GET_CODE (x
) == CONST_FIXED
))
6171 if (first_constant
== NULL_RTX
)
6174 if (i
> 0 && !rtx_equal_p (x
, first
))
6178 /* if all elements are the same, use splats to repeat elements */
6181 if (!CONSTANT_P (first
)
6182 && !register_operand (first
, GET_MODE (x
)))
6183 first
= force_reg (GET_MODE (first
), first
);
6184 emit_insn (gen_spu_splats (target
, first
));
6188 /* load constant parts */
6189 if (n_var
!= n_elts
)
6193 emit_move_insn (target
,
6194 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6198 rtx constant_parts_rtx
= copy_rtx (vals
);
6200 gcc_assert (first_constant
!= NULL_RTX
);
6201 /* fill empty slots with the first constant, this increases
6202 our chance of using splats in the recursive call below. */
6203 for (i
= 0; i
< n_elts
; ++i
)
6205 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6206 if (!(CONST_INT_P (x
)
6207 || GET_CODE (x
) == CONST_DOUBLE
6208 || GET_CODE (x
) == CONST_FIXED
))
6209 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6212 spu_expand_vector_init (target
, constant_parts_rtx
);
6216 /* load variable parts */
6219 rtx insert_operands
[4];
6221 insert_operands
[0] = target
;
6222 insert_operands
[2] = target
;
6223 for (i
= 0; i
< n_elts
; ++i
)
6225 x
= XVECEXP (vals
, 0, i
);
6226 if (!(CONST_INT_P (x
)
6227 || GET_CODE (x
) == CONST_DOUBLE
6228 || GET_CODE (x
) == CONST_FIXED
))
6230 if (!register_operand (x
, GET_MODE (x
)))
6231 x
= force_reg (GET_MODE (x
), x
);
6232 insert_operands
[1] = x
;
6233 insert_operands
[3] = GEN_INT (i
);
6234 spu_builtin_insert (insert_operands
);
6240 /* Return insn index for the vector compare instruction for given CODE,
6241 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6244 get_vec_cmp_insn (enum rtx_code code
,
6245 enum machine_mode dest_mode
,
6246 enum machine_mode op_mode
)
6252 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6253 return CODE_FOR_ceq_v16qi
;
6254 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6255 return CODE_FOR_ceq_v8hi
;
6256 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6257 return CODE_FOR_ceq_v4si
;
6258 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6259 return CODE_FOR_ceq_v4sf
;
6260 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6261 return CODE_FOR_ceq_v2df
;
6264 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6265 return CODE_FOR_cgt_v16qi
;
6266 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6267 return CODE_FOR_cgt_v8hi
;
6268 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6269 return CODE_FOR_cgt_v4si
;
6270 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6271 return CODE_FOR_cgt_v4sf
;
6272 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6273 return CODE_FOR_cgt_v2df
;
6276 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6277 return CODE_FOR_clgt_v16qi
;
6278 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6279 return CODE_FOR_clgt_v8hi
;
6280 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6281 return CODE_FOR_clgt_v4si
;
6289 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6290 DMODE is expected destination mode. This is a recursive function. */
6293 spu_emit_vector_compare (enum rtx_code rcode
,
6295 enum machine_mode dmode
)
6299 enum machine_mode dest_mode
;
6300 enum machine_mode op_mode
= GET_MODE (op1
);
6302 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6304 /* Floating point vector compare instructions uses destination V4SImode.
6305 Double floating point vector compare instructions uses destination V2DImode.
6306 Move destination to appropriate mode later. */
6307 if (dmode
== V4SFmode
)
6308 dest_mode
= V4SImode
;
6309 else if (dmode
== V2DFmode
)
6310 dest_mode
= V2DImode
;
6314 mask
= gen_reg_rtx (dest_mode
);
6315 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6317 if (vec_cmp_insn
== -1)
6319 bool swap_operands
= false;
6320 bool try_again
= false;
6325 swap_operands
= true;
6330 swap_operands
= true;
6334 /* Treat A != B as ~(A==B). */
6336 enum insn_code nor_code
;
6337 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6338 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6339 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6340 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
6341 if (dmode
!= dest_mode
)
6343 rtx temp
= gen_reg_rtx (dest_mode
);
6344 convert_move (temp
, mask
, 0);
6354 /* Try GT/GTU/LT/LTU OR EQ */
6357 enum insn_code ior_code
;
6358 enum rtx_code new_code
;
6362 case GE
: new_code
= GT
; break;
6363 case GEU
: new_code
= GTU
; break;
6364 case LE
: new_code
= LT
; break;
6365 case LEU
: new_code
= LTU
; break;
6370 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6371 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6373 ior_code
= optab_handler (ior_optab
, dest_mode
);
6374 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6375 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6376 if (dmode
!= dest_mode
)
6378 rtx temp
= gen_reg_rtx (dest_mode
);
6379 convert_move (temp
, mask
, 0);
6389 /* You only get two chances. */
6391 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6393 gcc_assert (vec_cmp_insn
!= -1);
6404 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6405 if (dmode
!= dest_mode
)
6407 rtx temp
= gen_reg_rtx (dest_mode
);
6408 convert_move (temp
, mask
, 0);
6415 /* Emit vector conditional expression.
6416 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6417 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6420 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6421 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6423 enum machine_mode dest_mode
= GET_MODE (dest
);
6424 enum rtx_code rcode
= GET_CODE (cond
);
6427 /* Get the vector mask for the given relational operations. */
6428 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6430 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6436 spu_force_reg (enum machine_mode mode
, rtx op
)
6439 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6441 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6442 || GET_MODE (op
) == BLKmode
)
6443 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6447 r
= force_reg (GET_MODE (op
), op
);
6448 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6450 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6455 x
= gen_reg_rtx (mode
);
6456 emit_insn (gen_spu_convert (x
, r
));
6461 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6463 HOST_WIDE_INT v
= 0;
6465 /* Check the range of immediate operands. */
6466 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6468 int range
= p
- SPU_BTI_7
;
6470 if (!CONSTANT_P (op
))
6471 error ("%s expects an integer literal in the range [%d, %d]",
6473 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6475 if (GET_CODE (op
) == CONST
6476 && (GET_CODE (XEXP (op
, 0)) == PLUS
6477 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6479 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6480 op
= XEXP (XEXP (op
, 0), 0);
6482 else if (GET_CODE (op
) == CONST_INT
)
6484 else if (GET_CODE (op
) == CONST_VECTOR
6485 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6486 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6488 /* The default for v is 0 which is valid in every range. */
6489 if (v
< spu_builtin_range
[range
].low
6490 || v
> spu_builtin_range
[range
].high
)
6491 error ("%s expects an integer literal in the range [%d, %d]. (%wd)",
6493 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6502 /* This is only used in lqa, and stqa. Even though the insns
6503 encode 16 bits of the address (all but the 2 least
6504 significant), only 14 bits are used because it is masked to
6505 be 16 byte aligned. */
6509 /* This is used for lqr and stqr. */
6516 if (GET_CODE (op
) == LABEL_REF
6517 || (GET_CODE (op
) == SYMBOL_REF
6518 && SYMBOL_REF_FUNCTION_P (op
))
6519 || (v
& ((1 << lsbits
) - 1)) != 0)
6520 warning (0, "%d least significant bits of %s are ignored", lsbits
,
6527 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6528 rtx target
, rtx ops
[])
6530 enum insn_code icode
= (enum insn_code
) d
->icode
;
6533 /* Expand the arguments into rtl. */
6535 if (d
->parm
[0] != SPU_BTI_VOID
)
6538 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6540 tree arg
= CALL_EXPR_ARG (exp
, a
);
6543 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6546 gcc_assert (i
== insn_data
[icode
].n_generator_args
);
6551 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6552 tree exp
, rtx target
)
6556 enum insn_code icode
= (enum insn_code
) d
->icode
;
6557 enum machine_mode mode
, tmode
;
6562 /* Set up ops[] with values from arglist. */
6563 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6565 /* Handle the target operand which must be operand 0. */
6567 if (d
->parm
[0] != SPU_BTI_VOID
)
6570 /* We prefer the mode specified for the match_operand otherwise
6571 use the mode from the builtin function prototype. */
6572 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6573 if (tmode
== VOIDmode
)
6574 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6576 /* Try to use target because not using it can lead to extra copies
6577 and when we are using all of the registers extra copies leads
6579 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6582 target
= ops
[0] = gen_reg_rtx (tmode
);
6584 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6590 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6592 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6597 arg
= CALL_EXPR_ARG (exp
, 0);
6598 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6599 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6600 addr
= memory_address (mode
, op
);
6603 op
= gen_reg_rtx (GET_MODE (addr
));
6604 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6605 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6606 op
= gen_rtx_MEM (mode
, op
);
6608 pat
= GEN_FCN (icode
) (target
, op
);
6615 /* Ignore align_hint, but still expand it's args in case they have
6617 if (icode
== CODE_FOR_spu_align_hint
)
6620 /* Handle the rest of the operands. */
6621 for (p
= 1; i
< n_operands
; i
++, p
++)
6623 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6624 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6626 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6628 /* mode can be VOIDmode here for labels */
6630 /* For specific intrinsics with an immediate operand, e.g.,
6631 si_ai(), we sometimes need to convert the scalar argument to a
6632 vector argument by splatting the scalar. */
6633 if (VECTOR_MODE_P (mode
)
6634 && (GET_CODE (ops
[i
]) == CONST_INT
6635 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6636 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6638 if (GET_CODE (ops
[i
]) == CONST_INT
)
6639 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6642 rtx reg
= gen_reg_rtx (mode
);
6643 enum machine_mode imode
= GET_MODE_INNER (mode
);
6644 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6645 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6646 if (imode
!= GET_MODE (ops
[i
]))
6647 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6648 TYPE_UNSIGNED (spu_builtin_types
6650 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6655 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6657 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6658 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6664 pat
= GEN_FCN (icode
) (0);
6667 pat
= GEN_FCN (icode
) (ops
[0]);
6670 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6673 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6676 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6679 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6682 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6691 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6692 emit_call_insn (pat
);
6693 else if (d
->type
== B_JUMP
)
6695 emit_jump_insn (pat
);
6701 return_type
= spu_builtin_types
[d
->parm
[0]];
6702 if (d
->parm
[0] != SPU_BTI_VOID
6703 && GET_MODE (target
) != TYPE_MODE (return_type
))
6705 /* target is the return value. It should always be the mode of
6706 the builtin function prototype. */
6707 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6714 spu_expand_builtin (tree exp
,
6716 rtx subtarget ATTRIBUTE_UNUSED
,
6717 enum machine_mode mode ATTRIBUTE_UNUSED
,
6718 int ignore ATTRIBUTE_UNUSED
)
6720 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6721 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6722 struct spu_builtin_description
*d
;
6724 if (fcode
< NUM_SPU_BUILTINS
)
6726 d
= &spu_builtins
[fcode
];
6728 return spu_expand_builtin_1 (d
, exp
, target
);
6733 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6735 spu_builtin_mul_widen_even (tree type
)
6737 switch (TYPE_MODE (type
))
6740 if (TYPE_UNSIGNED (type
))
6741 return spu_builtin_decls
[SPU_MULE_0
];
6743 return spu_builtin_decls
[SPU_MULE_1
];
6750 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6752 spu_builtin_mul_widen_odd (tree type
)
6754 switch (TYPE_MODE (type
))
6757 if (TYPE_UNSIGNED (type
))
6758 return spu_builtin_decls
[SPU_MULO_1
];
6760 return spu_builtin_decls
[SPU_MULO_0
];
6767 /* Implement targetm.vectorize.builtin_mask_for_load. */
6769 spu_builtin_mask_for_load (void)
6771 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6774 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6776 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6777 tree vectype ATTRIBUTE_UNUSED
,
6778 int misalign ATTRIBUTE_UNUSED
)
6780 switch (type_of_cost
)
6788 case cond_branch_not_taken
:
6796 /* Load + rotate. */
6799 case unaligned_load
:
6802 case cond_branch_taken
:
6810 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6811 after applying N number of iterations. This routine does not determine
6812 how may iterations are required to reach desired alignment. */
6815 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6820 /* All other types are naturally aligned. */
6824 /* Implement targetm.vectorize.builtin_vec_perm. */
6826 spu_builtin_vec_perm (tree type
, tree
*mask_element_type
)
6828 *mask_element_type
= unsigned_char_type_node
;
6830 switch (TYPE_MODE (type
))
6833 if (TYPE_UNSIGNED (type
))
6834 return spu_builtin_decls
[SPU_SHUFFLE_0
];
6836 return spu_builtin_decls
[SPU_SHUFFLE_1
];
6839 if (TYPE_UNSIGNED (type
))
6840 return spu_builtin_decls
[SPU_SHUFFLE_2
];
6842 return spu_builtin_decls
[SPU_SHUFFLE_3
];
6845 if (TYPE_UNSIGNED (type
))
6846 return spu_builtin_decls
[SPU_SHUFFLE_4
];
6848 return spu_builtin_decls
[SPU_SHUFFLE_5
];
6851 if (TYPE_UNSIGNED (type
))
6852 return spu_builtin_decls
[SPU_SHUFFLE_6
];
6854 return spu_builtin_decls
[SPU_SHUFFLE_7
];
6857 return spu_builtin_decls
[SPU_SHUFFLE_8
];
6860 return spu_builtin_decls
[SPU_SHUFFLE_9
];
6867 /* Return the appropriate mode for a named address pointer. */
6868 static enum machine_mode
6869 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6873 case ADDR_SPACE_GENERIC
:
6882 /* Return the appropriate mode for a named address address. */
6883 static enum machine_mode
6884 spu_addr_space_address_mode (addr_space_t addrspace
)
6888 case ADDR_SPACE_GENERIC
:
6897 /* Determine if one named address space is a subset of another. */
6900 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6902 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6903 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6905 if (subset
== superset
)
6908 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6909 being subsets but instead as disjoint address spaces. */
6910 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6914 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6917 /* Convert from one address space to another. */
6919 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6921 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6922 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6924 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6925 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6927 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6931 ls
= gen_const_mem (DImode
,
6932 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6933 set_mem_align (ls
, 128);
6935 result
= gen_reg_rtx (Pmode
);
6936 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6937 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6938 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6939 ls
, const0_rtx
, Pmode
, 1);
6941 emit_insn (gen_subsi3 (result
, op
, ls
));
6946 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6950 ls
= gen_const_mem (DImode
,
6951 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6952 set_mem_align (ls
, 128);
6954 result
= gen_reg_rtx (EAmode
);
6955 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6956 op
= force_reg (Pmode
, op
);
6957 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6958 ls
, const0_rtx
, EAmode
, 1);
6959 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6961 if (EAmode
== SImode
)
6962 emit_insn (gen_addsi3 (result
, op
, ls
));
6964 emit_insn (gen_adddi3 (result
, op
, ls
));
6974 /* Count the total number of instructions in each pipe and return the
6975 maximum, which is used as the Minimum Iteration Interval (MII)
6976 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6977 -2 are instructions that can go in pipe0 or pipe1. */
6979 spu_sms_res_mii (struct ddg
*g
)
6982 unsigned t
[4] = {0, 0, 0, 0};
6984 for (i
= 0; i
< g
->num_nodes
; i
++)
6986 rtx insn
= g
->nodes
[i
].insn
;
6987 int p
= get_pipe (insn
) + 2;
6989 gcc_assert (p
>= 0);
6993 if (dump_file
&& INSN_P (insn
))
6994 fprintf (dump_file
, "i%d %s %d %d\n",
6996 insn_data
[INSN_CODE(insn
)].name
,
7000 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
7002 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
7007 spu_init_expanders (void)
7012 /* HARD_FRAME_REGISTER is only 128 bit aligned when
7013 frame_pointer_needed is true. We don't know that until we're
7014 expanding the prologue. */
7015 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
7017 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
7018 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
7019 to be treated as aligned, so generate them here. */
7020 r0
= gen_reg_rtx (SImode
);
7021 r1
= gen_reg_rtx (SImode
);
7022 mark_reg_pointer (r0
, 128);
7023 mark_reg_pointer (r1
, 128);
7024 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
7025 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
7029 static enum machine_mode
7030 spu_libgcc_cmp_return_mode (void)
7033 /* For SPU word mode is TI mode so it is better to use SImode
7034 for compare returns. */
7038 static enum machine_mode
7039 spu_libgcc_shift_count_mode (void)
7041 /* For SPU word mode is TI mode so it is better to use SImode
7042 for shift counts. */
7046 /* An early place to adjust some flags after GCC has finished processing
7049 asm_file_start (void)
7051 default_file_start ();
7054 /* Implement targetm.section_type_flags. */
7056 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
7058 /* .toe needs to have type @nobits. */
7059 if (strcmp (name
, ".toe") == 0)
7061 /* Don't load _ea into the current address space. */
7062 if (strcmp (name
, "._ea") == 0)
7063 return SECTION_WRITE
| SECTION_DEBUG
;
7064 return default_section_type_flags (decl
, name
, reloc
);
7067 /* Implement targetm.select_section. */
7069 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
7071 /* Variables and constants defined in the __ea address space
7072 go into a special section named "._ea". */
7073 if (TREE_TYPE (decl
) != error_mark_node
7074 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
7076 /* We might get called with string constants, but get_named_section
7077 doesn't like them as they are not DECLs. Also, we need to set
7078 flags in that case. */
7080 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
7082 return get_named_section (decl
, "._ea", reloc
);
7085 return default_elf_select_section (decl
, reloc
, align
);
7088 /* Implement targetm.unique_section. */
7090 spu_unique_section (tree decl
, int reloc
)
7092 /* We don't support unique section names in the __ea address
7094 if (TREE_TYPE (decl
) != error_mark_node
7095 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
7098 default_unique_section (decl
, reloc
);
7101 /* Generate a constant or register which contains 2^SCALE. We assume
7102 the result is valid for MODE. Currently, MODE must be V4SFmode and
7103 SCALE must be SImode. */
7105 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
7107 gcc_assert (mode
== V4SFmode
);
7108 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
7109 if (GET_CODE (scale
) != CONST_INT
)
7111 /* unsigned int exp = (127 + scale) << 23;
7112 __vector float m = (__vector float) spu_splats (exp); */
7113 rtx reg
= force_reg (SImode
, scale
);
7114 rtx exp
= gen_reg_rtx (SImode
);
7115 rtx mul
= gen_reg_rtx (mode
);
7116 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
7117 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
7118 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
7123 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
7124 unsigned char arr
[16];
7125 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
7126 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
7127 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
7128 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
7129 return array_to_constant (mode
, arr
);
7133 /* After reload, just change the convert into a move instruction
7134 or a dead instruction. */
7136 spu_split_convert (rtx ops
[])
7138 if (REGNO (ops
[0]) == REGNO (ops
[1]))
7139 emit_note (NOTE_INSN_DELETED
);
7142 /* Use TImode always as this might help hard reg copyprop. */
7143 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
7144 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
7145 emit_insn (gen_move_insn (op0
, op1
));
7150 spu_function_profiler (FILE * file
, int labelno ATTRIBUTE_UNUSED
)
7152 fprintf (file
, "# profile\n");
7153 fprintf (file
, "brsl $75, _mcount\n");
7156 /* Implement targetm.ref_may_alias_errno. */
7158 spu_ref_may_alias_errno (ao_ref
*ref
)
7160 tree base
= ao_ref_base (ref
);
7162 /* With SPU newlib, errno is defined as something like
7164 The default implementation of this target macro does not
7165 recognize such expressions, so special-code for it here. */
7167 if (TREE_CODE (base
) == VAR_DECL
7168 && !TREE_STATIC (base
)
7169 && DECL_EXTERNAL (base
)
7170 && TREE_CODE (TREE_TYPE (base
)) == RECORD_TYPE
7171 && strcmp (IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (base
)),
7172 "_impure_data") == 0
7173 /* _errno is the first member of _impure_data. */
7174 && ref
->offset
== 0)
7177 return default_ref_may_alias_errno (ref
);
7180 /* Output thunk to FILE that implements a C++ virtual function call (with
7181 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
7182 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
7183 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
7184 relative to the resulting this pointer. */
7187 spu_output_mi_thunk (FILE *file
, tree thunk ATTRIBUTE_UNUSED
,
7188 HOST_WIDE_INT delta
, HOST_WIDE_INT vcall_offset
,
7193 /* Make sure unwind info is emitted for the thunk if needed. */
7194 final_start_function (emit_barrier (), file
, 1);
7196 /* Operand 0 is the target function. */
7197 op
[0] = XEXP (DECL_RTL (function
), 0);
7199 /* Operand 1 is the 'this' pointer. */
7200 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function
)), function
))
7201 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
+ 1);
7203 op
[1] = gen_rtx_REG (Pmode
, FIRST_ARG_REGNUM
);
7205 /* Operands 2/3 are the low/high halfwords of delta. */
7206 op
[2] = GEN_INT (trunc_int_for_mode (delta
, HImode
));
7207 op
[3] = GEN_INT (trunc_int_for_mode (delta
>> 16, HImode
));
7209 /* Operands 4/5 are the low/high halfwords of vcall_offset. */
7210 op
[4] = GEN_INT (trunc_int_for_mode (vcall_offset
, HImode
));
7211 op
[5] = GEN_INT (trunc_int_for_mode (vcall_offset
>> 16, HImode
));
7213 /* Operands 6/7 are temporary registers. */
7214 op
[6] = gen_rtx_REG (Pmode
, 79);
7215 op
[7] = gen_rtx_REG (Pmode
, 78);
7217 /* Add DELTA to this pointer. */
7220 if (delta
>= -0x200 && delta
< 0x200)
7221 output_asm_insn ("ai\t%1,%1,%2", op
);
7222 else if (delta
>= -0x8000 && delta
< 0x8000)
7224 output_asm_insn ("il\t%6,%2", op
);
7225 output_asm_insn ("a\t%1,%1,%6", op
);
7229 output_asm_insn ("ilhu\t%6,%3", op
);
7230 output_asm_insn ("iohl\t%6,%2", op
);
7231 output_asm_insn ("a\t%1,%1,%6", op
);
7235 /* Perform vcall adjustment. */
7238 output_asm_insn ("lqd\t%7,0(%1)", op
);
7239 output_asm_insn ("rotqby\t%7,%7,%1", op
);
7241 if (vcall_offset
>= -0x200 && vcall_offset
< 0x200)
7242 output_asm_insn ("ai\t%7,%7,%4", op
);
7243 else if (vcall_offset
>= -0x8000 && vcall_offset
< 0x8000)
7245 output_asm_insn ("il\t%6,%4", op
);
7246 output_asm_insn ("a\t%7,%7,%6", op
);
7250 output_asm_insn ("ilhu\t%6,%5", op
);
7251 output_asm_insn ("iohl\t%6,%4", op
);
7252 output_asm_insn ("a\t%7,%7,%6", op
);
7255 output_asm_insn ("lqd\t%6,0(%7)", op
);
7256 output_asm_insn ("rotqby\t%6,%6,%7", op
);
7257 output_asm_insn ("a\t%1,%1,%6", op
);
7260 /* Jump to target. */
7261 output_asm_insn ("br\t%0", op
);
7263 final_end_function ();