1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
36 #include "basic-block.h"
37 #include "integrate.h"
38 #include "diagnostic-core.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tm-constrs.h"
59 /* Builtin types, data and prototypes. */
61 enum spu_builtin_type_index
63 SPU_BTI_END_OF_PARAMS
,
65 /* We create new type nodes for these. */
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
80 /* These all correspond to intSI_type_node */
94 /* These correspond to the standard types */
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
125 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
127 struct spu_builtin_range
132 static struct spu_builtin_range spu_builtin_range
[] = {
133 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
134 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
136 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
137 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
139 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
140 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
141 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
151 /* Prototypes and external defs. */
152 static void spu_option_override (void);
153 static void spu_option_init_struct (struct gcc_options
*opts
);
154 static void spu_option_default_params (void);
155 static void spu_init_builtins (void);
156 static tree
spu_builtin_decl (unsigned, bool);
157 static bool spu_scalar_mode_supported_p (enum machine_mode mode
);
158 static bool spu_vector_mode_supported_p (enum machine_mode mode
);
159 static bool spu_legitimate_address_p (enum machine_mode
, rtx
, bool);
160 static bool spu_addr_space_legitimate_address_p (enum machine_mode
, rtx
,
162 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
163 static rtx
get_pic_reg (void);
164 static int need_to_save_reg (int regno
, int saving
);
165 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
166 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
167 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
169 static void emit_nop_for_insn (rtx insn
);
170 static bool insn_clobbers_hbr (rtx insn
);
171 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
172 int distance
, sbitmap blocks
);
173 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
174 enum machine_mode dmode
);
175 static rtx
get_branch_target (rtx branch
);
176 static void spu_machine_dependent_reorg (void);
177 static int spu_sched_issue_rate (void);
178 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
180 static int get_pipe (rtx insn
);
181 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
182 static void spu_sched_init_global (FILE *, int, int);
183 static void spu_sched_init (FILE *, int, int);
184 static int spu_sched_reorder (FILE *, int, rtx
*, int *, int);
185 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
188 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
191 static int spu_naked_function_p (tree func
);
192 static bool spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
193 const_tree type
, bool named
);
194 static rtx
spu_function_arg (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
195 const_tree type
, bool named
);
196 static void spu_function_arg_advance (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
197 const_tree type
, bool named
);
198 static tree
spu_build_builtin_va_list (void);
199 static void spu_va_start (tree
, rtx
);
200 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
,
201 gimple_seq
* pre_p
, gimple_seq
* post_p
);
202 static int store_with_one_insn_p (rtx mem
);
203 static int mem_is_padded_component_ref (rtx x
);
204 static int reg_aligned_for_addr (rtx x
);
205 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
206 static void spu_asm_globalize_label (FILE * file
, const char *name
);
207 static bool spu_rtx_costs (rtx x
, int code
, int outer_code
,
208 int *total
, bool speed
);
209 static bool spu_function_ok_for_sibcall (tree decl
, tree exp
);
210 static void spu_init_libfuncs (void);
211 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
212 static void fix_range (const char *);
213 static void spu_encode_section_info (tree
, rtx
, int);
214 static rtx
spu_legitimize_address (rtx
, rtx
, enum machine_mode
);
215 static rtx
spu_addr_space_legitimize_address (rtx
, rtx
, enum machine_mode
,
217 static tree
spu_builtin_mul_widen_even (tree
);
218 static tree
spu_builtin_mul_widen_odd (tree
);
219 static tree
spu_builtin_mask_for_load (void);
220 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt
, tree
, int);
221 static bool spu_vector_alignment_reachable (const_tree
, bool);
222 static tree
spu_builtin_vec_perm (tree
, tree
*);
223 static enum machine_mode
spu_addr_space_pointer_mode (addr_space_t
);
224 static enum machine_mode
spu_addr_space_address_mode (addr_space_t
);
225 static bool spu_addr_space_subset_p (addr_space_t
, addr_space_t
);
226 static rtx
spu_addr_space_convert (rtx
, tree
, tree
);
227 static int spu_sms_res_mii (struct ddg
*g
);
228 static void asm_file_start (void);
229 static unsigned int spu_section_type_flags (tree
, const char *, int);
230 static section
*spu_select_section (tree
, int, unsigned HOST_WIDE_INT
);
231 static void spu_unique_section (tree
, int);
232 static rtx
spu_expand_load (rtx
, rtx
, rtx
, int);
233 static void spu_trampoline_init (rtx
, tree
, rtx
);
235 /* Which instruction set architecture to use. */
237 /* Which cpu are we tuning for. */
240 /* The hardware requires 8 insns between a hint and the branch it
241 effects. This variable describes how many rtl instructions the
242 compiler needs to see before inserting a hint, and then the compiler
243 will insert enough nops to make it at least 8 insns. The default is
244 for the compiler to allow up to 2 nops be emitted. The nops are
245 inserted in pairs, so we round down. */
246 int spu_hint_dist
= (8*4) - (2*4);
248 /* Determines whether we run variable tracking in machine dependent
250 static int spu_flag_var_tracking
;
265 IC_POOL
, /* constant pool */
266 IC_IL1
, /* one il* instruction */
267 IC_IL2
, /* both ilhu and iohl instructions */
268 IC_IL1s
, /* one il* instruction */
269 IC_IL2s
, /* both ilhu and iohl instructions */
270 IC_FSMBI
, /* the fsmbi instruction */
271 IC_CPAT
, /* one of the c*d instructions */
272 IC_FSMBI2
/* fsmbi plus 1 other instruction */
275 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
276 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
277 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
278 static enum immediate_class
classify_immediate (rtx op
,
279 enum machine_mode mode
);
281 static enum machine_mode
spu_unwind_word_mode (void);
283 static enum machine_mode
284 spu_libgcc_cmp_return_mode (void);
286 static enum machine_mode
287 spu_libgcc_shift_count_mode (void);
289 /* Pointer mode for __ea references. */
290 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
293 /* Table of machine attributes. */
294 static const struct attribute_spec spu_attribute_table
[] =
296 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
297 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
299 { NULL
, 0, 0, false, false, false, NULL
}
302 /* TARGET overrides. */
304 #undef TARGET_ADDR_SPACE_POINTER_MODE
305 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
307 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
308 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
310 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312 spu_addr_space_legitimate_address_p
314 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
317 #undef TARGET_ADDR_SPACE_SUBSET_P
318 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
320 #undef TARGET_ADDR_SPACE_CONVERT
321 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
323 #undef TARGET_INIT_BUILTINS
324 #define TARGET_INIT_BUILTINS spu_init_builtins
325 #undef TARGET_BUILTIN_DECL
326 #define TARGET_BUILTIN_DECL spu_builtin_decl
328 #undef TARGET_EXPAND_BUILTIN
329 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
331 #undef TARGET_UNWIND_WORD_MODE
332 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
334 #undef TARGET_LEGITIMIZE_ADDRESS
335 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
337 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338 and .quad for the debugger. When it is known that the assembler is fixed,
339 these can be removed. */
340 #undef TARGET_ASM_UNALIGNED_SI_OP
341 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
343 #undef TARGET_ASM_ALIGNED_DI_OP
344 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
346 /* The .8byte directive doesn't seem to work well for a 32 bit
348 #undef TARGET_ASM_UNALIGNED_DI_OP
349 #define TARGET_ASM_UNALIGNED_DI_OP NULL
351 #undef TARGET_RTX_COSTS
352 #define TARGET_RTX_COSTS spu_rtx_costs
354 #undef TARGET_ADDRESS_COST
355 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
357 #undef TARGET_SCHED_ISSUE_RATE
358 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
360 #undef TARGET_SCHED_INIT_GLOBAL
361 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
363 #undef TARGET_SCHED_INIT
364 #define TARGET_SCHED_INIT spu_sched_init
366 #undef TARGET_SCHED_VARIABLE_ISSUE
367 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
369 #undef TARGET_SCHED_REORDER
370 #define TARGET_SCHED_REORDER spu_sched_reorder
372 #undef TARGET_SCHED_REORDER2
373 #define TARGET_SCHED_REORDER2 spu_sched_reorder
375 #undef TARGET_SCHED_ADJUST_COST
376 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
381 #undef TARGET_ASM_INTEGER
382 #define TARGET_ASM_INTEGER spu_assemble_integer
384 #undef TARGET_SCALAR_MODE_SUPPORTED_P
385 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
387 #undef TARGET_VECTOR_MODE_SUPPORTED_P
388 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
393 #undef TARGET_ASM_GLOBALIZE_LABEL
394 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
396 #undef TARGET_PASS_BY_REFERENCE
397 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
399 #undef TARGET_FUNCTION_ARG
400 #define TARGET_FUNCTION_ARG spu_function_arg
402 #undef TARGET_FUNCTION_ARG_ADVANCE
403 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
405 #undef TARGET_MUST_PASS_IN_STACK
406 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
408 #undef TARGET_BUILD_BUILTIN_VA_LIST
409 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
411 #undef TARGET_EXPAND_BUILTIN_VA_START
412 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
414 #undef TARGET_SETUP_INCOMING_VARARGS
415 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
417 #undef TARGET_MACHINE_DEPENDENT_REORG
418 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
420 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
421 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
423 #undef TARGET_DEFAULT_TARGET_FLAGS
424 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
426 #undef TARGET_INIT_LIBFUNCS
427 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
429 #undef TARGET_RETURN_IN_MEMORY
430 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
432 #undef TARGET_ENCODE_SECTION_INFO
433 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
435 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
436 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
438 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
439 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
441 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
442 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
444 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
445 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
447 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
448 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
450 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
451 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
453 #undef TARGET_LIBGCC_CMP_RETURN_MODE
454 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
456 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
457 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
459 #undef TARGET_SCHED_SMS_RES_MII
460 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
462 #undef TARGET_ASM_FILE_START
463 #define TARGET_ASM_FILE_START asm_file_start
465 #undef TARGET_SECTION_TYPE_FLAGS
466 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
468 #undef TARGET_ASM_SELECT_SECTION
469 #define TARGET_ASM_SELECT_SECTION spu_select_section
471 #undef TARGET_ASM_UNIQUE_SECTION
472 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
474 #undef TARGET_LEGITIMATE_ADDRESS_P
475 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
477 #undef TARGET_TRAMPOLINE_INIT
478 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
480 #undef TARGET_OPTION_OVERRIDE
481 #define TARGET_OPTION_OVERRIDE spu_option_override
483 #undef TARGET_OPTION_INIT_STRUCT
484 #define TARGET_OPTION_INIT_STRUCT spu_option_init_struct
486 #undef TARGET_OPTION_DEFAULT_PARAMS
487 #define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
489 #undef TARGET_EXCEPT_UNWIND_INFO
490 #define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
492 struct gcc_target targetm
= TARGET_INITIALIZER
;
495 spu_option_init_struct (struct gcc_options
*opts
)
497 /* With so many registers this is better on by default. */
498 opts
->x_flag_rename_registers
= 1;
501 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
503 spu_option_default_params (void)
505 /* Override some of the default param values. With so many registers
506 larger values are better for these params. */
507 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH
, 128);
510 /* Implement TARGET_OPTION_OVERRIDE. */
512 spu_option_override (void)
514 /* Small loops will be unpeeled at -O3. For SPU it is more important
515 to keep code small by default. */
516 if (!flag_unroll_loops
&& !flag_peel_loops
)
517 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES
, 1,
518 global_options
.x_param_values
,
519 global_options_set
.x_param_values
);
521 flag_omit_frame_pointer
= 1;
523 /* Functions must be 8 byte aligned so we correctly handle dual issue */
524 if (align_functions
< 8)
527 spu_hint_dist
= 8*4 - spu_max_nops
*4;
528 if (spu_hint_dist
< 0)
531 if (spu_fixed_range_string
)
532 fix_range (spu_fixed_range_string
);
534 /* Determine processor architectural level. */
537 if (strcmp (&spu_arch_string
[0], "cell") == 0)
538 spu_arch
= PROCESSOR_CELL
;
539 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
540 spu_arch
= PROCESSOR_CELLEDP
;
542 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
545 /* Determine processor to tune for. */
548 if (strcmp (&spu_tune_string
[0], "cell") == 0)
549 spu_tune
= PROCESSOR_CELL
;
550 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
551 spu_tune
= PROCESSOR_CELLEDP
;
553 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
556 /* Change defaults according to the processor architecture. */
557 if (spu_arch
== PROCESSOR_CELLEDP
)
559 /* If no command line option has been otherwise specified, change
560 the default to -mno-safe-hints on celledp -- only the original
561 Cell/B.E. processors require this workaround. */
562 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
563 target_flags
&= ~MASK_SAFE_HINTS
;
566 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
569 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
570 struct attribute_spec.handler. */
572 /* True if MODE is valid for the target. By "valid", we mean able to
573 be manipulated in non-trivial ways. In particular, this means all
574 the arithmetic is supported. */
576 spu_scalar_mode_supported_p (enum machine_mode mode
)
594 /* Similarly for vector modes. "Supported" here is less strict. At
595 least some operations are supported; need to check optabs or builtins
596 for further details. */
598 spu_vector_mode_supported_p (enum machine_mode mode
)
615 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
616 least significant bytes of the outer mode. This function returns
617 TRUE for the SUBREG's where this is correct. */
619 valid_subreg (rtx op
)
621 enum machine_mode om
= GET_MODE (op
);
622 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
623 return om
!= VOIDmode
&& im
!= VOIDmode
624 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
625 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
626 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
629 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
630 and adjust the start offset. */
632 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
634 enum machine_mode mode
;
636 /* Strip any paradoxical SUBREG. */
637 if (GET_CODE (op
) == SUBREG
638 && (GET_MODE_BITSIZE (GET_MODE (op
))
639 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
643 GET_MODE_BITSIZE (GET_MODE (op
)) -
644 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
645 op
= SUBREG_REG (op
);
647 /* If it is smaller than SI, assure a SUBREG */
648 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
652 *start
+= 32 - op_size
;
655 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
656 mode
= mode_for_size (op_size
, MODE_INT
, 0);
657 if (mode
!= GET_MODE (op
))
658 op
= gen_rtx_SUBREG (mode
, op
, 0);
663 spu_expand_extv (rtx ops
[], int unsignedp
)
665 rtx dst
= ops
[0], src
= ops
[1];
666 HOST_WIDE_INT width
= INTVAL (ops
[2]);
667 HOST_WIDE_INT start
= INTVAL (ops
[3]);
668 HOST_WIDE_INT align_mask
;
669 rtx s0
, s1
, mask
, r0
;
671 gcc_assert (REG_P (dst
) && GET_MODE (dst
) == TImode
);
675 /* First, determine if we need 1 TImode load or 2. We need only 1
676 if the bits being extracted do not cross the alignment boundary
677 as determined by the MEM and its address. */
679 align_mask
= -MEM_ALIGN (src
);
680 if ((start
& align_mask
) == ((start
+ width
- 1) & align_mask
))
682 /* Alignment is sufficient for 1 load. */
683 s0
= gen_reg_rtx (TImode
);
684 r0
= spu_expand_load (s0
, 0, src
, start
/ 8);
687 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
692 s0
= gen_reg_rtx (TImode
);
693 s1
= gen_reg_rtx (TImode
);
694 r0
= spu_expand_load (s0
, s1
, src
, start
/ 8);
697 gcc_assert (start
+ width
<= 128);
700 rtx r1
= gen_reg_rtx (SImode
);
701 mask
= gen_reg_rtx (TImode
);
702 emit_move_insn (mask
, GEN_INT (-1));
703 emit_insn (gen_rotqby_ti (s0
, s0
, r0
));
704 emit_insn (gen_rotqby_ti (s1
, s1
, r0
));
705 if (GET_CODE (r0
) == CONST_INT
)
706 r1
= GEN_INT (INTVAL (r0
) & 15);
708 emit_insn (gen_andsi3 (r1
, r0
, GEN_INT (15)));
709 emit_insn (gen_shlqby_ti (mask
, mask
, r1
));
710 emit_insn (gen_selb (s0
, s1
, s0
, mask
));
715 else if (GET_CODE (src
) == SUBREG
)
717 rtx r
= SUBREG_REG (src
);
718 gcc_assert (REG_P (r
) && SCALAR_INT_MODE_P (GET_MODE (r
)));
719 s0
= gen_reg_rtx (TImode
);
720 if (GET_MODE_SIZE (GET_MODE (r
)) < GET_MODE_SIZE (TImode
))
721 emit_insn (gen_rtx_SET (VOIDmode
, s0
, gen_rtx_ZERO_EXTEND (TImode
, r
)));
723 emit_move_insn (s0
, src
);
727 gcc_assert (REG_P (src
) && GET_MODE (src
) == TImode
);
728 s0
= gen_reg_rtx (TImode
);
729 emit_move_insn (s0
, src
);
732 /* Now s0 is TImode and contains the bits to extract at start. */
735 emit_insn (gen_rotlti3 (s0
, s0
, GEN_INT (start
)));
739 tree c
= build_int_cst (NULL_TREE
, 128 - width
);
740 s0
= expand_shift (RSHIFT_EXPR
, TImode
, s0
, c
, s0
, unsignedp
);
743 emit_move_insn (dst
, s0
);
747 spu_expand_insv (rtx ops
[])
749 HOST_WIDE_INT width
= INTVAL (ops
[1]);
750 HOST_WIDE_INT start
= INTVAL (ops
[2]);
751 HOST_WIDE_INT maskbits
;
752 enum machine_mode dst_mode
, src_mode
;
753 rtx dst
= ops
[0], src
= ops
[3];
754 int dst_size
, src_size
;
760 if (GET_CODE (ops
[0]) == MEM
)
761 dst
= gen_reg_rtx (TImode
);
763 dst
= adjust_operand (dst
, &start
);
764 dst_mode
= GET_MODE (dst
);
765 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
767 if (CONSTANT_P (src
))
769 enum machine_mode m
=
770 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
771 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
773 src
= adjust_operand (src
, 0);
774 src_mode
= GET_MODE (src
);
775 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
777 mask
= gen_reg_rtx (dst_mode
);
778 shift_reg
= gen_reg_rtx (dst_mode
);
779 shift
= dst_size
- start
- width
;
781 /* It's not safe to use subreg here because the compiler assumes
782 that the SUBREG_REG is right justified in the SUBREG. */
783 convert_move (shift_reg
, src
, 1);
790 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
793 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
796 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
808 maskbits
= (-1ll << (32 - width
- start
));
810 maskbits
+= (1ll << (32 - start
));
811 emit_move_insn (mask
, GEN_INT (maskbits
));
814 maskbits
= (-1ll << (64 - width
- start
));
816 maskbits
+= (1ll << (64 - start
));
817 emit_move_insn (mask
, GEN_INT (maskbits
));
821 unsigned char arr
[16];
823 memset (arr
, 0, sizeof (arr
));
824 arr
[i
] = 0xff >> (start
& 7);
825 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
827 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
828 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
834 if (GET_CODE (ops
[0]) == MEM
)
836 rtx low
= gen_reg_rtx (SImode
);
837 rtx rotl
= gen_reg_rtx (SImode
);
838 rtx mask0
= gen_reg_rtx (TImode
);
844 addr
= force_reg (Pmode
, XEXP (ops
[0], 0));
845 addr0
= gen_rtx_AND (Pmode
, addr
, GEN_INT (-16));
846 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
847 emit_insn (gen_negsi2 (rotl
, low
));
848 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
849 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
850 mem
= change_address (ops
[0], TImode
, addr0
);
851 set_mem_alias_set (mem
, 0);
852 emit_move_insn (dst
, mem
);
853 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
854 if (start
+ width
> MEM_ALIGN (ops
[0]))
856 rtx shl
= gen_reg_rtx (SImode
);
857 rtx mask1
= gen_reg_rtx (TImode
);
858 rtx dst1
= gen_reg_rtx (TImode
);
860 addr1
= plus_constant (addr
, 16);
861 addr1
= gen_rtx_AND (Pmode
, addr1
, GEN_INT (-16));
862 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
863 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
864 mem1
= change_address (ops
[0], TImode
, addr1
);
865 set_mem_alias_set (mem1
, 0);
866 emit_move_insn (dst1
, mem1
);
867 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
868 emit_move_insn (mem1
, dst1
);
870 emit_move_insn (mem
, dst
);
873 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
878 spu_expand_block_move (rtx ops
[])
880 HOST_WIDE_INT bytes
, align
, offset
;
881 rtx src
, dst
, sreg
, dreg
, target
;
883 if (GET_CODE (ops
[2]) != CONST_INT
884 || GET_CODE (ops
[3]) != CONST_INT
885 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
888 bytes
= INTVAL (ops
[2]);
889 align
= INTVAL (ops
[3]);
899 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
901 dst
= adjust_address (ops
[0], V16QImode
, offset
);
902 src
= adjust_address (ops
[1], V16QImode
, offset
);
903 emit_move_insn (dst
, src
);
908 unsigned char arr
[16] = { 0 };
909 for (i
= 0; i
< bytes
- offset
; i
++)
911 dst
= adjust_address (ops
[0], V16QImode
, offset
);
912 src
= adjust_address (ops
[1], V16QImode
, offset
);
913 mask
= gen_reg_rtx (V16QImode
);
914 sreg
= gen_reg_rtx (V16QImode
);
915 dreg
= gen_reg_rtx (V16QImode
);
916 target
= gen_reg_rtx (V16QImode
);
917 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
918 emit_move_insn (dreg
, dst
);
919 emit_move_insn (sreg
, src
);
920 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
921 emit_move_insn (dst
, target
);
929 { SPU_EQ
, SPU_GT
, SPU_GTU
};
931 int spu_comp_icode
[12][3] = {
932 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
933 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
934 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
935 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
936 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
937 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
938 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
939 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
940 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
941 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
942 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
943 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
946 /* Generate a compare for CODE. Return a brand-new rtx that represents
947 the result of the compare. GCC can figure this out too if we don't
948 provide all variations of compares, but GCC always wants to use
949 WORD_MODE, we can generate better code in most cases if we do it
952 spu_emit_branch_or_set (int is_set
, rtx cmp
, rtx operands
[])
954 int reverse_compare
= 0;
955 int reverse_test
= 0;
956 rtx compare_result
, eq_result
;
957 rtx comp_rtx
, eq_rtx
;
958 enum machine_mode comp_mode
;
959 enum machine_mode op_mode
;
960 enum spu_comp_code scode
, eq_code
;
961 enum insn_code ior_code
;
962 enum rtx_code code
= GET_CODE (cmp
);
963 rtx op0
= XEXP (cmp
, 0);
964 rtx op1
= XEXP (cmp
, 1);
968 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
969 and so on, to keep the constant in operand 1. */
970 if (GET_CODE (op1
) == CONST_INT
)
972 HOST_WIDE_INT val
= INTVAL (op1
) - 1;
973 if (trunc_int_for_mode (val
, GET_MODE (op0
)) == val
)
998 op_mode
= GET_MODE (op0
);
1004 if (HONOR_NANS (op_mode
))
1006 reverse_compare
= 0;
1013 reverse_compare
= 1;
1019 if (HONOR_NANS (op_mode
))
1021 reverse_compare
= 1;
1028 reverse_compare
= 0;
1033 reverse_compare
= 1;
1038 reverse_compare
= 1;
1043 reverse_compare
= 0;
1048 reverse_compare
= 1;
1053 reverse_compare
= 0;
1099 comp_mode
= op_mode
;
1103 comp_mode
= op_mode
;
1107 comp_mode
= op_mode
;
1111 comp_mode
= V4SImode
;
1115 comp_mode
= V2DImode
;
1122 if (GET_MODE (op1
) == DFmode
1123 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
1126 if (is_set
== 0 && op1
== const0_rtx
1127 && (GET_MODE (op0
) == SImode
1128 || GET_MODE (op0
) == HImode
) && scode
== SPU_EQ
)
1130 /* Don't need to set a register with the result when we are
1131 comparing against zero and branching. */
1132 reverse_test
= !reverse_test
;
1133 compare_result
= op0
;
1137 compare_result
= gen_reg_rtx (comp_mode
);
1139 if (reverse_compare
)
1146 if (spu_comp_icode
[index
][scode
] == 0)
1149 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
1151 op0
= force_reg (op_mode
, op0
);
1152 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
1154 op1
= force_reg (op_mode
, op1
);
1155 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
1159 emit_insn (comp_rtx
);
1163 eq_result
= gen_reg_rtx (comp_mode
);
1164 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
1169 ior_code
= optab_handler (ior_optab
, comp_mode
);
1170 gcc_assert (ior_code
!= CODE_FOR_nothing
);
1171 emit_insn (GEN_FCN (ior_code
)
1172 (compare_result
, compare_result
, eq_result
));
1181 /* We don't have branch on QI compare insns, so we convert the
1182 QI compare result to a HI result. */
1183 if (comp_mode
== QImode
)
1185 rtx old_res
= compare_result
;
1186 compare_result
= gen_reg_rtx (HImode
);
1188 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
1192 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
1194 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
1196 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, operands
[3]);
1197 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
1198 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
1201 else if (is_set
== 2)
1203 rtx target
= operands
[0];
1204 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
1205 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
1206 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
1208 rtx op_t
= operands
[2];
1209 rtx op_f
= operands
[3];
1211 /* The result of the comparison can be SI, HI or QI mode. Create a
1212 mask based on that result. */
1213 if (target_size
> compare_size
)
1215 select_mask
= gen_reg_rtx (mode
);
1216 emit_insn (gen_extend_compare (select_mask
, compare_result
));
1218 else if (target_size
< compare_size
)
1220 gen_rtx_SUBREG (mode
, compare_result
,
1221 (compare_size
- target_size
) / BITS_PER_UNIT
);
1222 else if (comp_mode
!= mode
)
1223 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
1225 select_mask
= compare_result
;
1227 if (GET_MODE (target
) != GET_MODE (op_t
)
1228 || GET_MODE (target
) != GET_MODE (op_f
))
1232 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1234 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1238 rtx target
= operands
[0];
1240 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1241 gen_rtx_NOT (comp_mode
, compare_result
)));
1242 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1243 emit_insn (gen_extendhisi2 (target
, compare_result
));
1244 else if (GET_MODE (target
) == SImode
1245 && GET_MODE (compare_result
) == QImode
)
1246 emit_insn (gen_extend_compare (target
, compare_result
));
1248 emit_move_insn (target
, compare_result
);
1253 const_double_to_hwint (rtx x
)
1257 if (GET_MODE (x
) == SFmode
)
1259 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1260 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1262 else if (GET_MODE (x
) == DFmode
)
1265 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1266 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1268 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1276 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1280 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1283 tv
[0] = (v
<< 32) >> 32;
1284 else if (mode
== DFmode
)
1286 tv
[1] = (v
<< 32) >> 32;
1289 real_from_target (&rv
, tv
, mode
);
1290 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1294 print_operand_address (FILE * file
, register rtx addr
)
1299 if (GET_CODE (addr
) == AND
1300 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1301 && INTVAL (XEXP (addr
, 1)) == -16)
1302 addr
= XEXP (addr
, 0);
1304 switch (GET_CODE (addr
))
1307 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1311 reg
= XEXP (addr
, 0);
1312 offset
= XEXP (addr
, 1);
1313 if (GET_CODE (offset
) == REG
)
1315 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1316 reg_names
[REGNO (offset
)]);
1318 else if (GET_CODE (offset
) == CONST_INT
)
1320 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1321 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1331 output_addr_const (file
, addr
);
1341 print_operand (FILE * file
, rtx x
, int code
)
1343 enum machine_mode mode
= GET_MODE (x
);
1345 unsigned char arr
[16];
1346 int xcode
= GET_CODE (x
);
1348 if (GET_MODE (x
) == VOIDmode
)
1351 case 'L': /* 128 bits, signed */
1352 case 'm': /* 128 bits, signed */
1353 case 'T': /* 128 bits, signed */
1354 case 't': /* 128 bits, signed */
1357 case 'K': /* 64 bits, signed */
1358 case 'k': /* 64 bits, signed */
1359 case 'D': /* 64 bits, signed */
1360 case 'd': /* 64 bits, signed */
1363 case 'J': /* 32 bits, signed */
1364 case 'j': /* 32 bits, signed */
1365 case 's': /* 32 bits, signed */
1366 case 'S': /* 32 bits, signed */
1373 case 'j': /* 32 bits, signed */
1374 case 'k': /* 64 bits, signed */
1375 case 'm': /* 128 bits, signed */
1376 if (xcode
== CONST_INT
1377 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1379 gcc_assert (logical_immediate_p (x
, mode
));
1380 constant_to_array (mode
, x
, arr
);
1381 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1382 val
= trunc_int_for_mode (val
, SImode
);
1383 switch (which_logical_immediate (val
))
1388 fprintf (file
, "h");
1391 fprintf (file
, "b");
1401 case 'J': /* 32 bits, signed */
1402 case 'K': /* 64 bits, signed */
1403 case 'L': /* 128 bits, signed */
1404 if (xcode
== CONST_INT
1405 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1407 gcc_assert (logical_immediate_p (x
, mode
)
1408 || iohl_immediate_p (x
, mode
));
1409 constant_to_array (mode
, x
, arr
);
1410 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1411 val
= trunc_int_for_mode (val
, SImode
);
1412 switch (which_logical_immediate (val
))
1418 val
= trunc_int_for_mode (val
, HImode
);
1421 val
= trunc_int_for_mode (val
, QImode
);
1426 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1432 case 't': /* 128 bits, signed */
1433 case 'd': /* 64 bits, signed */
1434 case 's': /* 32 bits, signed */
1437 enum immediate_class c
= classify_immediate (x
, mode
);
1441 constant_to_array (mode
, x
, arr
);
1442 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1443 val
= trunc_int_for_mode (val
, SImode
);
1444 switch (which_immediate_load (val
))
1449 fprintf (file
, "a");
1452 fprintf (file
, "h");
1455 fprintf (file
, "hu");
1462 constant_to_array (mode
, x
, arr
);
1463 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1465 fprintf (file
, "b");
1467 fprintf (file
, "h");
1469 fprintf (file
, "w");
1471 fprintf (file
, "d");
1474 if (xcode
== CONST_VECTOR
)
1476 x
= CONST_VECTOR_ELT (x
, 0);
1477 xcode
= GET_CODE (x
);
1479 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1480 fprintf (file
, "a");
1481 else if (xcode
== HIGH
)
1482 fprintf (file
, "hu");
1496 case 'T': /* 128 bits, signed */
1497 case 'D': /* 64 bits, signed */
1498 case 'S': /* 32 bits, signed */
1501 enum immediate_class c
= classify_immediate (x
, mode
);
1505 constant_to_array (mode
, x
, arr
);
1506 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1507 val
= trunc_int_for_mode (val
, SImode
);
1508 switch (which_immediate_load (val
))
1515 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1520 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1523 constant_to_array (mode
, x
, arr
);
1525 for (i
= 0; i
< 16; i
++)
1530 print_operand (file
, GEN_INT (val
), 0);
1533 constant_to_array (mode
, x
, arr
);
1534 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1535 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1540 if (GET_CODE (x
) == CONST_VECTOR
)
1541 x
= CONST_VECTOR_ELT (x
, 0);
1542 output_addr_const (file
, x
);
1544 fprintf (file
, "@h");
1558 if (xcode
== CONST_INT
)
1560 /* Only 4 least significant bits are relevant for generate
1561 control word instructions. */
1562 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1567 case 'M': /* print code for c*d */
1568 if (GET_CODE (x
) == CONST_INT
)
1572 fprintf (file
, "b");
1575 fprintf (file
, "h");
1578 fprintf (file
, "w");
1581 fprintf (file
, "d");
1590 case 'N': /* Negate the operand */
1591 if (xcode
== CONST_INT
)
1592 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1593 else if (xcode
== CONST_VECTOR
)
1594 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1595 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1598 case 'I': /* enable/disable interrupts */
1599 if (xcode
== CONST_INT
)
1600 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1603 case 'b': /* branch modifiers */
1605 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1606 else if (COMPARISON_P (x
))
1607 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1610 case 'i': /* indirect call */
1613 if (GET_CODE (XEXP (x
, 0)) == REG
)
1614 /* Used in indirect function calls. */
1615 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1617 output_address (XEXP (x
, 0));
1621 case 'p': /* load/store */
1625 xcode
= GET_CODE (x
);
1630 xcode
= GET_CODE (x
);
1633 fprintf (file
, "d");
1634 else if (xcode
== CONST_INT
)
1635 fprintf (file
, "a");
1636 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1637 fprintf (file
, "r");
1638 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1640 if (GET_CODE (XEXP (x
, 1)) == REG
)
1641 fprintf (file
, "x");
1643 fprintf (file
, "d");
1648 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1650 output_addr_const (file
, GEN_INT (val
));
1654 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1656 output_addr_const (file
, GEN_INT (val
));
1660 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1662 output_addr_const (file
, GEN_INT (val
));
1666 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1667 val
= (val
>> 3) & 0x1f;
1668 output_addr_const (file
, GEN_INT (val
));
1672 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1675 output_addr_const (file
, GEN_INT (val
));
1679 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1682 output_addr_const (file
, GEN_INT (val
));
1686 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1689 output_addr_const (file
, GEN_INT (val
));
1693 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1694 val
= -(val
& -8ll);
1695 val
= (val
>> 3) & 0x1f;
1696 output_addr_const (file
, GEN_INT (val
));
1701 constant_to_array (mode
, x
, arr
);
1702 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1703 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1708 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1709 else if (xcode
== MEM
)
1710 output_address (XEXP (x
, 0));
1711 else if (xcode
== CONST_VECTOR
)
1712 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1714 output_addr_const (file
, x
);
1721 output_operand_lossage ("invalid %%xn code");
1726 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1727 caller saved register. For leaf functions it is more efficient to
1728 use a volatile register because we won't need to save and restore the
1729 pic register. This routine is only valid after register allocation
1730 is completed, so we can pick an unused register. */
1734 rtx pic_reg
= pic_offset_table_rtx
;
1735 if (!reload_completed
&& !reload_in_progress
)
1737 if (current_function_is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1738 pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1742 /* Split constant addresses to handle cases that are too large.
1743 Add in the pic register when in PIC mode.
1744 Split immediates that require more than 1 instruction. */
1746 spu_split_immediate (rtx
* ops
)
1748 enum machine_mode mode
= GET_MODE (ops
[0]);
1749 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1755 unsigned char arrhi
[16];
1756 unsigned char arrlo
[16];
1757 rtx to
, temp
, hi
, lo
;
1759 enum machine_mode imode
= mode
;
1760 /* We need to do reals as ints because the constant used in the
1761 IOR might not be a legitimate real constant. */
1762 imode
= int_mode_for_mode (mode
);
1763 constant_to_array (mode
, ops
[1], arrhi
);
1765 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1768 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1769 for (i
= 0; i
< 16; i
+= 4)
1771 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1772 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1773 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1774 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1776 hi
= array_to_constant (imode
, arrhi
);
1777 lo
= array_to_constant (imode
, arrlo
);
1778 emit_move_insn (temp
, hi
);
1779 emit_insn (gen_rtx_SET
1780 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1785 unsigned char arr_fsmbi
[16];
1786 unsigned char arr_andbi
[16];
1787 rtx to
, reg_fsmbi
, reg_and
;
1789 enum machine_mode imode
= mode
;
1790 /* We need to do reals as ints because the constant used in the
1791 * AND might not be a legitimate real constant. */
1792 imode
= int_mode_for_mode (mode
);
1793 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1795 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1798 for (i
= 0; i
< 16; i
++)
1799 if (arr_fsmbi
[i
] != 0)
1801 arr_andbi
[0] = arr_fsmbi
[i
];
1802 arr_fsmbi
[i
] = 0xff;
1804 for (i
= 1; i
< 16; i
++)
1805 arr_andbi
[i
] = arr_andbi
[0];
1806 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1807 reg_and
= array_to_constant (imode
, arr_andbi
);
1808 emit_move_insn (to
, reg_fsmbi
);
1809 emit_insn (gen_rtx_SET
1810 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1814 if (reload_in_progress
|| reload_completed
)
1816 rtx mem
= force_const_mem (mode
, ops
[1]);
1817 if (TARGET_LARGE_MEM
)
1819 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1820 emit_move_insn (addr
, XEXP (mem
, 0));
1821 mem
= replace_equiv_address (mem
, addr
);
1823 emit_move_insn (ops
[0], mem
);
1829 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1833 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1834 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1837 emit_insn (gen_pic (ops
[0], ops
[1]));
1840 rtx pic_reg
= get_pic_reg ();
1841 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1842 crtl
->uses_pic_offset_table
= 1;
1844 return flag_pic
|| c
== IC_IL2s
;
1855 /* SAVING is TRUE when we are generating the actual load and store
1856 instructions for REGNO. When determining the size of the stack
1857 needed for saving register we must allocate enough space for the
1858 worst case, because we don't always have the information early enough
1859 to not allocate it. But we can at least eliminate the actual loads
1860 and stores during the prologue/epilogue. */
1862 need_to_save_reg (int regno
, int saving
)
1864 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1867 && regno
== PIC_OFFSET_TABLE_REGNUM
1868 && (!saving
|| crtl
->uses_pic_offset_table
)
1870 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1875 /* This function is only correct starting with local register
1878 spu_saved_regs_size (void)
1880 int reg_save_size
= 0;
1883 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1884 if (need_to_save_reg (regno
, 0))
1885 reg_save_size
+= 0x10;
1886 return reg_save_size
;
1890 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1892 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1894 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1895 return emit_insn (gen_movv4si (mem
, reg
));
1899 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1901 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1903 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1904 return emit_insn (gen_movv4si (reg
, mem
));
1907 /* This happens after reload, so we need to expand it. */
1909 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1912 if (satisfies_constraint_K (GEN_INT (imm
)))
1914 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1918 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1919 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1920 if (REGNO (src
) == REGNO (scratch
))
1926 /* Return nonzero if this function is known to have a null epilogue. */
1929 direct_return (void)
1931 if (reload_completed
)
1933 if (cfun
->static_chain_decl
== 0
1934 && (spu_saved_regs_size ()
1936 + crtl
->outgoing_args_size
1937 + crtl
->args
.pretend_args_size
== 0)
1938 && current_function_is_leaf
)
1945 The stack frame looks like this:
1949 AP -> +-------------+
1952 prev SP | back chain |
1955 | reg save | crtl->args.pretend_args_size bytes
1958 | saved regs | spu_saved_regs_size() bytes
1959 FP -> +-------------+
1961 | vars | get_frame_size() bytes
1962 HFP -> +-------------+
1965 | args | crtl->outgoing_args_size bytes
1971 SP -> +-------------+
1975 spu_expand_prologue (void)
1977 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1978 HOST_WIDE_INT total_size
;
1979 HOST_WIDE_INT saved_regs_size
;
1980 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1981 rtx scratch_reg_0
, scratch_reg_1
;
1984 if (flag_pic
&& optimize
== 0)
1985 crtl
->uses_pic_offset_table
= 1;
1987 if (spu_naked_function_p (current_function_decl
))
1990 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1991 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1993 saved_regs_size
= spu_saved_regs_size ();
1994 total_size
= size
+ saved_regs_size
1995 + crtl
->outgoing_args_size
1996 + crtl
->args
.pretend_args_size
;
1998 if (!current_function_is_leaf
1999 || cfun
->calls_alloca
|| total_size
> 0)
2000 total_size
+= STACK_POINTER_OFFSET
;
2002 /* Save this first because code after this might use the link
2003 register as a scratch register. */
2004 if (!current_function_is_leaf
)
2006 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2007 RTX_FRAME_RELATED_P (insn
) = 1;
2012 offset
= -crtl
->args
.pretend_args_size
;
2013 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2014 if (need_to_save_reg (regno
, 1))
2017 insn
= frame_emit_store (regno
, sp_reg
, offset
);
2018 RTX_FRAME_RELATED_P (insn
) = 1;
2022 if (flag_pic
&& crtl
->uses_pic_offset_table
)
2024 rtx pic_reg
= get_pic_reg ();
2025 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
2026 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
2031 if (flag_stack_check
)
2033 /* We compare against total_size-1 because
2034 ($sp >= total_size) <=> ($sp > total_size-1) */
2035 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
2036 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
2037 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
2038 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
2040 emit_move_insn (scratch_v4si
, size_v4si
);
2041 size_v4si
= scratch_v4si
;
2043 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
2044 emit_insn (gen_vec_extractv4si
2045 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
2046 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
2049 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2050 the value of the previous $sp because we save it as the back
2052 if (total_size
<= 2000)
2054 /* In this case we save the back chain first. */
2055 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
2057 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
2061 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
2063 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
2065 RTX_FRAME_RELATED_P (insn
) = 1;
2066 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
2067 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
2069 if (total_size
> 2000)
2071 /* Save the back chain ptr */
2072 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
2075 if (frame_pointer_needed
)
2077 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
2078 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
2079 + crtl
->outgoing_args_size
;
2080 /* Set the new frame_pointer */
2081 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
2082 RTX_FRAME_RELATED_P (insn
) = 1;
2083 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
2084 add_reg_note (insn
, REG_FRAME_RELATED_EXPR
, real
);
2085 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
2092 spu_expand_epilogue (bool sibcall_p
)
2094 int size
= get_frame_size (), offset
, regno
;
2095 HOST_WIDE_INT saved_regs_size
, total_size
;
2096 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
2097 rtx jump
, scratch_reg_0
;
2099 if (spu_naked_function_p (current_function_decl
))
2102 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
2104 saved_regs_size
= spu_saved_regs_size ();
2105 total_size
= size
+ saved_regs_size
2106 + crtl
->outgoing_args_size
2107 + crtl
->args
.pretend_args_size
;
2109 if (!current_function_is_leaf
2110 || cfun
->calls_alloca
|| total_size
> 0)
2111 total_size
+= STACK_POINTER_OFFSET
;
2115 if (cfun
->calls_alloca
)
2116 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
2118 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
2121 if (saved_regs_size
> 0)
2123 offset
= -crtl
->args
.pretend_args_size
;
2124 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2125 if (need_to_save_reg (regno
, 1))
2128 frame_emit_load (regno
, sp_reg
, offset
);
2133 if (!current_function_is_leaf
)
2134 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2138 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
2139 jump
= emit_jump_insn (gen__return ());
2140 emit_barrier_after (jump
);
2146 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
2150 /* This is inefficient because it ends up copying to a save-register
2151 which then gets saved even though $lr has already been saved. But
2152 it does generate better code for leaf functions and we don't need
2153 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2154 used for __builtin_return_address anyway, so maybe we don't care if
2155 it's inefficient. */
2156 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
2160 /* Given VAL, generate a constant appropriate for MODE.
2161 If MODE is a vector mode, every element will be VAL.
2162 For TImode, VAL will be zero extended to 128 bits. */
2164 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
2170 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
2171 || GET_MODE_CLASS (mode
) == MODE_FLOAT
2172 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
2173 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
2175 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2176 return immed_double_const (val
, 0, mode
);
2178 /* val is the bit representation of the float */
2179 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
2180 return hwint_to_const_double (mode
, val
);
2182 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
2183 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
2185 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
2187 units
= GET_MODE_NUNITS (mode
);
2189 v
= rtvec_alloc (units
);
2191 for (i
= 0; i
< units
; ++i
)
2192 RTVEC_ELT (v
, i
) = inner
;
2194 return gen_rtx_CONST_VECTOR (mode
, v
);
2197 /* Create a MODE vector constant from 4 ints. */
2199 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
2201 unsigned char arr
[16];
2202 arr
[0] = (a
>> 24) & 0xff;
2203 arr
[1] = (a
>> 16) & 0xff;
2204 arr
[2] = (a
>> 8) & 0xff;
2205 arr
[3] = (a
>> 0) & 0xff;
2206 arr
[4] = (b
>> 24) & 0xff;
2207 arr
[5] = (b
>> 16) & 0xff;
2208 arr
[6] = (b
>> 8) & 0xff;
2209 arr
[7] = (b
>> 0) & 0xff;
2210 arr
[8] = (c
>> 24) & 0xff;
2211 arr
[9] = (c
>> 16) & 0xff;
2212 arr
[10] = (c
>> 8) & 0xff;
2213 arr
[11] = (c
>> 0) & 0xff;
2214 arr
[12] = (d
>> 24) & 0xff;
2215 arr
[13] = (d
>> 16) & 0xff;
2216 arr
[14] = (d
>> 8) & 0xff;
2217 arr
[15] = (d
>> 0) & 0xff;
2218 return array_to_constant(mode
, arr
);
2221 /* branch hint stuff */
2223 /* An array of these is used to propagate hints to predecessor blocks. */
2226 rtx prop_jump
; /* propagated from another block */
2227 int bb_index
; /* the original block. */
2229 static struct spu_bb_info
*spu_bb_info
;
2231 #define STOP_HINT_P(INSN) \
2232 (GET_CODE(INSN) == CALL_INSN \
2233 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2234 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2236 /* 1 when RTX is a hinted branch or its target. We keep track of
2237 what has been hinted so the safe-hint code can test it easily. */
2238 #define HINTED_P(RTX) \
2239 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2241 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2242 #define SCHED_ON_EVEN_P(RTX) \
2243 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2245 /* Emit a nop for INSN such that the two will dual issue. This assumes
2246 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2247 We check for TImode to handle a MULTI1 insn which has dual issued its
2248 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2251 emit_nop_for_insn (rtx insn
)
2255 p
= get_pipe (insn
);
2256 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2257 new_insn
= emit_insn_after (gen_lnop (), insn
);
2258 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2260 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2261 PUT_MODE (new_insn
, TImode
);
2262 PUT_MODE (insn
, VOIDmode
);
2265 new_insn
= emit_insn_after (gen_lnop (), insn
);
2266 recog_memoized (new_insn
);
2269 /* Insert nops in basic blocks to meet dual issue alignment
2270 requirements. Also make sure hbrp and hint instructions are at least
2271 one cycle apart, possibly inserting a nop. */
2275 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2279 /* This sets up INSN_ADDRESSES. */
2280 shorten_branches (get_insns ());
2282 /* Keep track of length added by nops. */
2286 insn
= get_insns ();
2287 if (!active_insn_p (insn
))
2288 insn
= next_active_insn (insn
);
2289 for (; insn
; insn
= next_insn
)
2291 next_insn
= next_active_insn (insn
);
2292 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2293 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2297 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2298 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2299 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2302 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2303 PUT_MODE (prev_insn
, GET_MODE (insn
));
2304 PUT_MODE (insn
, TImode
);
2310 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2312 if (GET_MODE (insn
) == TImode
)
2313 PUT_MODE (next_insn
, TImode
);
2315 next_insn
= next_active_insn (insn
);
2317 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2318 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2320 if (((addr
+ length
) & 7) != 0)
2322 emit_nop_for_insn (prev_insn
);
2326 else if (GET_MODE (insn
) == TImode
2327 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2328 || get_attr_type (insn
) == TYPE_MULTI0
)
2329 && ((addr
+ length
) & 7) != 0)
2331 /* prev_insn will always be set because the first insn is
2332 always 8-byte aligned. */
2333 emit_nop_for_insn (prev_insn
);
2341 /* Routines for branch hints. */
2344 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2345 int distance
, sbitmap blocks
)
2347 rtx branch_label
= 0;
2352 if (before
== 0 || branch
== 0 || target
== 0)
2355 /* While scheduling we require hints to be no further than 600, so
2356 we need to enforce that here too */
2360 /* If we have a Basic block note, emit it after the basic block note. */
2361 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2362 before
= NEXT_INSN (before
);
2364 branch_label
= gen_label_rtx ();
2365 LABEL_NUSES (branch_label
)++;
2366 LABEL_PRESERVE_P (branch_label
) = 1;
2367 insn
= emit_label_before (branch_label
, branch
);
2368 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2369 SET_BIT (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2371 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2372 recog_memoized (hint
);
2373 HINTED_P (branch
) = 1;
2375 if (GET_CODE (target
) == LABEL_REF
)
2376 HINTED_P (XEXP (target
, 0)) = 1;
2377 else if (tablejump_p (branch
, 0, &table
))
2381 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2382 vec
= XVEC (PATTERN (table
), 0);
2384 vec
= XVEC (PATTERN (table
), 1);
2385 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2386 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2389 if (distance
>= 588)
2391 /* Make sure the hint isn't scheduled any earlier than this point,
2392 which could make it too far for the branch offest to fit */
2393 recog_memoized (emit_insn_before (gen_blockage (), hint
));
2395 else if (distance
<= 8 * 4)
2397 /* To guarantee at least 8 insns between the hint and branch we
2400 for (d
= distance
; d
< 8 * 4; d
+= 4)
2403 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2404 recog_memoized (insn
);
2407 /* Make sure any nops inserted aren't scheduled before the hint. */
2408 recog_memoized (emit_insn_after (gen_blockage (), hint
));
2410 /* Make sure any nops inserted aren't scheduled after the call. */
2411 if (CALL_P (branch
) && distance
< 8 * 4)
2412 recog_memoized (emit_insn_before (gen_blockage (), branch
));
2416 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2417 the rtx for the branch target. */
2419 get_branch_target (rtx branch
)
2421 if (GET_CODE (branch
) == JUMP_INSN
)
2425 /* Return statements */
2426 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2427 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2430 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2431 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2435 if (extract_asm_operands (PATTERN (branch
)) != NULL
)
2438 set
= single_set (branch
);
2439 src
= SET_SRC (set
);
2440 if (GET_CODE (SET_DEST (set
)) != PC
)
2443 if (GET_CODE (src
) == IF_THEN_ELSE
)
2446 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2449 /* If the more probable case is not a fall through, then
2450 try a branch hint. */
2451 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2452 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2453 && GET_CODE (XEXP (src
, 1)) != PC
)
2454 lab
= XEXP (src
, 1);
2455 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2456 && GET_CODE (XEXP (src
, 2)) != PC
)
2457 lab
= XEXP (src
, 2);
2461 if (GET_CODE (lab
) == RETURN
)
2462 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2470 else if (GET_CODE (branch
) == CALL_INSN
)
2473 /* All of our call patterns are in a PARALLEL and the CALL is
2474 the first pattern in the PARALLEL. */
2475 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2477 call
= XVECEXP (PATTERN (branch
), 0, 0);
2478 if (GET_CODE (call
) == SET
)
2479 call
= SET_SRC (call
);
2480 if (GET_CODE (call
) != CALL
)
2482 return XEXP (XEXP (call
, 0), 0);
2487 /* The special $hbr register is used to prevent the insn scheduler from
2488 moving hbr insns across instructions which invalidate them. It
2489 should only be used in a clobber, and this function searches for
2490 insns which clobber it. */
2492 insn_clobbers_hbr (rtx insn
)
2495 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2497 rtx parallel
= PATTERN (insn
);
2500 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2502 clobber
= XVECEXP (parallel
, 0, j
);
2503 if (GET_CODE (clobber
) == CLOBBER
2504 && GET_CODE (XEXP (clobber
, 0)) == REG
2505 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2512 /* Search up to 32 insns starting at FIRST:
2513 - at any kind of hinted branch, just return
2514 - at any unconditional branch in the first 15 insns, just return
2515 - at a call or indirect branch, after the first 15 insns, force it to
2516 an even address and return
2517 - at any unconditional branch, after the first 15 insns, force it to
2519 At then end of the search, insert an hbrp within 4 insns of FIRST,
2520 and an hbrp within 16 instructions of FIRST.
2523 insert_hbrp_for_ilb_runout (rtx first
)
2525 rtx insn
, before_4
= 0, before_16
= 0;
2526 int addr
= 0, length
, first_addr
= -1;
2527 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2528 int insert_lnop_after
= 0;
2529 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2532 if (first_addr
== -1)
2533 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2534 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2535 length
= get_attr_length (insn
);
2537 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2539 /* We test for 14 instructions because the first hbrp will add
2540 up to 2 instructions. */
2541 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2544 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2546 /* Make sure an hbrp is at least 2 cycles away from a hint.
2547 Insert an lnop after the hbrp when necessary. */
2548 if (before_4
== 0 && addr
> 0)
2551 insert_lnop_after
|= 1;
2553 else if (before_4
&& addr
<= 4 * 4)
2554 insert_lnop_after
|= 1;
2555 if (before_16
== 0 && addr
> 10 * 4)
2558 insert_lnop_after
|= 2;
2560 else if (before_16
&& addr
<= 14 * 4)
2561 insert_lnop_after
|= 2;
2564 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2566 if (addr
< hbrp_addr0
)
2568 else if (addr
< hbrp_addr1
)
2572 if (CALL_P (insn
) || JUMP_P (insn
))
2574 if (HINTED_P (insn
))
2577 /* Any branch after the first 15 insns should be on an even
2578 address to avoid a special case branch. There might be
2579 some nops and/or hbrps inserted, so we test after 10
2582 SCHED_ON_EVEN_P (insn
) = 1;
2585 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2589 if (addr
+ length
>= 32 * 4)
2591 gcc_assert (before_4
&& before_16
);
2592 if (hbrp_addr0
> 4 * 4)
2595 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2596 recog_memoized (insn
);
2597 INSN_ADDRESSES_NEW (insn
,
2598 INSN_ADDRESSES (INSN_UID (before_4
)));
2599 PUT_MODE (insn
, GET_MODE (before_4
));
2600 PUT_MODE (before_4
, TImode
);
2601 if (insert_lnop_after
& 1)
2603 insn
= emit_insn_before (gen_lnop (), before_4
);
2604 recog_memoized (insn
);
2605 INSN_ADDRESSES_NEW (insn
,
2606 INSN_ADDRESSES (INSN_UID (before_4
)));
2607 PUT_MODE (insn
, TImode
);
2610 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2611 && hbrp_addr1
> 16 * 4)
2614 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2615 recog_memoized (insn
);
2616 INSN_ADDRESSES_NEW (insn
,
2617 INSN_ADDRESSES (INSN_UID (before_16
)));
2618 PUT_MODE (insn
, GET_MODE (before_16
));
2619 PUT_MODE (before_16
, TImode
);
2620 if (insert_lnop_after
& 2)
2622 insn
= emit_insn_before (gen_lnop (), before_16
);
2623 recog_memoized (insn
);
2624 INSN_ADDRESSES_NEW (insn
,
2625 INSN_ADDRESSES (INSN_UID
2627 PUT_MODE (insn
, TImode
);
2633 else if (BARRIER_P (insn
))
2638 /* The SPU might hang when it executes 48 inline instructions after a
2639 hinted branch jumps to its hinted target. The beginning of a
2640 function and the return from a call might have been hinted, and must
2641 be handled as well. To prevent a hang we insert 2 hbrps. The first
2642 should be within 6 insns of the branch target. The second should be
2643 within 22 insns of the branch target. When determining if hbrps are
2644 necessary, we look for only 32 inline instructions, because up to to
2645 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2646 new hbrps, we insert them within 4 and 16 insns of the target. */
2651 if (TARGET_SAFE_HINTS
)
2653 shorten_branches (get_insns ());
2654 /* Insert hbrp at beginning of function */
2655 insn
= next_active_insn (get_insns ());
2657 insert_hbrp_for_ilb_runout (insn
);
2658 /* Insert hbrp after hinted targets. */
2659 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2660 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2661 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2665 static int in_spu_reorg
;
2667 /* Insert branch hints. There are no branch optimizations after this
2668 pass, so it's safe to set our branch hints now. */
2670 spu_machine_dependent_reorg (void)
2675 rtx branch_target
= 0;
2676 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2680 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2682 /* We still do it for unoptimized code because an external
2683 function might have hinted a call or return. */
2689 blocks
= sbitmap_alloc (last_basic_block
);
2690 sbitmap_zero (blocks
);
2693 compute_bb_for_insn ();
2698 (struct spu_bb_info
*) xcalloc (n_basic_blocks
,
2699 sizeof (struct spu_bb_info
));
2701 /* We need exact insn addresses and lengths. */
2702 shorten_branches (get_insns ());
2704 for (i
= n_basic_blocks
- 1; i
>= 0; i
--)
2706 bb
= BASIC_BLOCK (i
);
2708 if (spu_bb_info
[i
].prop_jump
)
2710 branch
= spu_bb_info
[i
].prop_jump
;
2711 branch_target
= get_branch_target (branch
);
2712 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2713 required_dist
= spu_hint_dist
;
2715 /* Search from end of a block to beginning. In this loop, find
2716 jumps which need a branch and emit them only when:
2717 - it's an indirect branch and we're at the insn which sets
2719 - we're at an insn that will invalidate the hint. e.g., a
2720 call, another hint insn, inline asm that clobbers $hbr, and
2721 some inlined operations (divmodsi4). Don't consider jumps
2722 because they are only at the end of a block and are
2723 considered when we are deciding whether to propagate
2724 - we're getting too far away from the branch. The hbr insns
2725 only have a signed 10 bit offset
2726 We go back as far as possible so the branch will be considered
2727 for propagation when we get to the beginning of the block. */
2728 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2732 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2734 && ((GET_CODE (branch_target
) == REG
2735 && set_of (branch_target
, insn
) != NULL_RTX
)
2736 || insn_clobbers_hbr (insn
)
2737 || branch_addr
- insn_addr
> 600))
2739 rtx next
= NEXT_INSN (insn
);
2740 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2741 if (insn
!= BB_END (bb
)
2742 && branch_addr
- next_addr
>= required_dist
)
2746 "hint for %i in block %i before %i\n",
2747 INSN_UID (branch
), bb
->index
,
2749 spu_emit_branch_hint (next
, branch
, branch_target
,
2750 branch_addr
- next_addr
, blocks
);
2755 /* JUMP_P will only be true at the end of a block. When
2756 branch is already set it means we've previously decided
2757 to propagate a hint for that branch into this block. */
2758 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2761 if ((branch_target
= get_branch_target (insn
)))
2764 branch_addr
= insn_addr
;
2765 required_dist
= spu_hint_dist
;
2769 if (insn
== BB_HEAD (bb
))
2775 /* If we haven't emitted a hint for this branch yet, it might
2776 be profitable to emit it in one of the predecessor blocks,
2777 especially for loops. */
2779 basic_block prev
= 0, prop
= 0, prev2
= 0;
2780 int loop_exit
= 0, simple_loop
= 0;
2781 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2783 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2784 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2785 prev
= EDGE_PRED (bb
, j
)->src
;
2787 prev2
= EDGE_PRED (bb
, j
)->src
;
2789 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2790 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2792 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2795 /* If this branch is a loop exit then propagate to previous
2796 fallthru block. This catches the cases when it is a simple
2797 loop or when there is an initial branch into the loop. */
2798 if (prev
&& (loop_exit
|| simple_loop
)
2799 && prev
->loop_depth
<= bb
->loop_depth
)
2802 /* If there is only one adjacent predecessor. Don't propagate
2803 outside this loop. This loop_depth test isn't perfect, but
2804 I'm not sure the loop_father member is valid at this point. */
2805 else if (prev
&& single_pred_p (bb
)
2806 && prev
->loop_depth
== bb
->loop_depth
)
2809 /* If this is the JOIN block of a simple IF-THEN then
2810 propogate the hint to the HEADER block. */
2811 else if (prev
&& prev2
2812 && EDGE_COUNT (bb
->preds
) == 2
2813 && EDGE_COUNT (prev
->preds
) == 1
2814 && EDGE_PRED (prev
, 0)->src
== prev2
2815 && prev2
->loop_depth
== bb
->loop_depth
2816 && GET_CODE (branch_target
) != REG
)
2819 /* Don't propagate when:
2820 - this is a simple loop and the hint would be too far
2821 - this is not a simple loop and there are 16 insns in
2823 - the predecessor block ends in a branch that will be
2825 - the predecessor block ends in an insn that invalidates
2829 && (bbend
= BB_END (prop
))
2830 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2831 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2832 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2835 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2836 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2837 bb
->index
, prop
->index
, bb
->loop_depth
,
2838 INSN_UID (branch
), loop_exit
, simple_loop
,
2839 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2841 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2842 spu_bb_info
[prop
->index
].bb_index
= i
;
2844 else if (branch_addr
- next_addr
>= required_dist
)
2847 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2848 INSN_UID (branch
), bb
->index
,
2849 INSN_UID (NEXT_INSN (insn
)));
2850 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2851 branch_addr
- next_addr
, blocks
);
2858 if (!sbitmap_empty_p (blocks
))
2859 find_many_sub_basic_blocks (blocks
);
2861 /* We have to schedule to make sure alignment is ok. */
2862 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2864 /* The hints need to be scheduled, so call it again. */
2871 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2872 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2874 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2875 between its branch label and the branch . We don't move the
2876 label because GCC expects it at the beginning of the block. */
2877 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2878 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2879 rtx label
= XEXP (label_ref
, 0);
2882 for (branch
= NEXT_INSN (label
);
2883 !JUMP_P (branch
) && !CALL_P (branch
);
2884 branch
= NEXT_INSN (branch
))
2885 if (NONJUMP_INSN_P (branch
))
2886 offset
+= get_attr_length (branch
);
2888 XVECEXP (unspec
, 0, 0) = plus_constant (label_ref
, offset
);
2891 if (spu_flag_var_tracking
)
2894 timevar_push (TV_VAR_TRACKING
);
2895 variable_tracking_main ();
2896 timevar_pop (TV_VAR_TRACKING
);
2897 df_finish_pass (false);
2900 free_bb_for_insn ();
2906 /* Insn scheduling routines, primarily for dual issue. */
2908 spu_sched_issue_rate (void)
2914 uses_ls_unit(rtx insn
)
2916 rtx set
= single_set (insn
);
2918 && (GET_CODE (SET_DEST (set
)) == MEM
2919 || GET_CODE (SET_SRC (set
)) == MEM
))
2928 /* Handle inline asm */
2929 if (INSN_CODE (insn
) == -1)
2931 t
= get_attr_type (insn
);
2956 case TYPE_IPREFETCH
:
2964 /* haifa-sched.c has a static variable that keeps track of the current
2965 cycle. It is passed to spu_sched_reorder, and we record it here for
2966 use by spu_sched_variable_issue. It won't be accurate if the
2967 scheduler updates it's clock_var between the two calls. */
2968 static int clock_var
;
2970 /* This is used to keep track of insn alignment. Set to 0 at the
2971 beginning of each block and increased by the "length" attr of each
2973 static int spu_sched_length
;
2975 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2976 ready list appropriately in spu_sched_reorder(). */
2977 static int pipe0_clock
;
2978 static int pipe1_clock
;
2980 static int prev_clock_var
;
2982 static int prev_priority
;
2984 /* The SPU needs to load the next ilb sometime during the execution of
2985 the previous ilb. There is a potential conflict if every cycle has a
2986 load or store. To avoid the conflict we make sure the load/store
2987 unit is free for at least one cycle during the execution of insns in
2988 the previous ilb. */
2989 static int spu_ls_first
;
2990 static int prev_ls_clock
;
2993 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
2994 int max_ready ATTRIBUTE_UNUSED
)
2996 spu_sched_length
= 0;
3000 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3001 int max_ready ATTRIBUTE_UNUSED
)
3003 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
3005 /* When any block might be at least 8-byte aligned, assume they
3006 will all be at least 8-byte aligned to make sure dual issue
3007 works out correctly. */
3008 spu_sched_length
= 0;
3010 spu_ls_first
= INT_MAX
;
3015 prev_clock_var
= -1;
3020 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
3021 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
3025 if (GET_CODE (PATTERN (insn
)) == USE
3026 || GET_CODE (PATTERN (insn
)) == CLOBBER
3027 || (len
= get_attr_length (insn
)) == 0)
3030 spu_sched_length
+= len
;
3032 /* Reset on inline asm */
3033 if (INSN_CODE (insn
) == -1)
3035 spu_ls_first
= INT_MAX
;
3040 p
= get_pipe (insn
);
3042 pipe0_clock
= clock_var
;
3044 pipe1_clock
= clock_var
;
3048 if (clock_var
- prev_ls_clock
> 1
3049 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
3050 spu_ls_first
= INT_MAX
;
3051 if (uses_ls_unit (insn
))
3053 if (spu_ls_first
== INT_MAX
)
3054 spu_ls_first
= spu_sched_length
;
3055 prev_ls_clock
= clock_var
;
3058 /* The scheduler hasn't inserted the nop, but we will later on.
3059 Include those nops in spu_sched_length. */
3060 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
3061 spu_sched_length
+= 4;
3062 prev_clock_var
= clock_var
;
3064 /* more is -1 when called from spu_sched_reorder for new insns
3065 that don't have INSN_PRIORITY */
3067 prev_priority
= INSN_PRIORITY (insn
);
3070 /* Always try issueing more insns. spu_sched_reorder will decide
3071 when the cycle should be advanced. */
3075 /* This function is called for both TARGET_SCHED_REORDER and
3076 TARGET_SCHED_REORDER2. */
3078 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3079 rtx
*ready
, int *nreadyp
, int clock
)
3081 int i
, nready
= *nreadyp
;
3082 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
3087 if (nready
<= 0 || pipe1_clock
>= clock
)
3090 /* Find any rtl insns that don't generate assembly insns and schedule
3092 for (i
= nready
- 1; i
>= 0; i
--)
3095 if (INSN_CODE (insn
) == -1
3096 || INSN_CODE (insn
) == CODE_FOR_blockage
3097 || (INSN_P (insn
) && get_attr_length (insn
) == 0))
3099 ready
[i
] = ready
[nready
- 1];
3100 ready
[nready
- 1] = insn
;
3105 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
3106 for (i
= 0; i
< nready
; i
++)
3107 if (INSN_CODE (ready
[i
]) != -1)
3110 switch (get_attr_type (insn
))
3135 case TYPE_IPREFETCH
:
3141 /* In the first scheduling phase, schedule loads and stores together
3142 to increase the chance they will get merged during postreload CSE. */
3143 if (!reload_completed
&& pipe_ls
>= 0)
3145 insn
= ready
[pipe_ls
];
3146 ready
[pipe_ls
] = ready
[nready
- 1];
3147 ready
[nready
- 1] = insn
;
3151 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3155 /* When we have loads/stores in every cycle of the last 15 insns and
3156 we are about to schedule another load/store, emit an hbrp insn
3159 && spu_sched_length
- spu_ls_first
>= 4 * 15
3160 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
3162 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3163 recog_memoized (insn
);
3164 if (pipe0_clock
< clock
)
3165 PUT_MODE (insn
, TImode
);
3166 spu_sched_variable_issue (file
, verbose
, insn
, -1);
3170 /* In general, we want to emit nops to increase dual issue, but dual
3171 issue isn't faster when one of the insns could be scheduled later
3172 without effecting the critical path. We look at INSN_PRIORITY to
3173 make a good guess, but it isn't perfect so -mdual-nops=n can be
3174 used to effect it. */
3175 if (in_spu_reorg
&& spu_dual_nops
< 10)
3177 /* When we are at an even address and we are not issueing nops to
3178 improve scheduling then we need to advance the cycle. */
3179 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
3180 && (spu_dual_nops
== 0
3183 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
3186 /* When at an odd address, schedule the highest priority insn
3187 without considering pipeline. */
3188 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
3189 && (spu_dual_nops
== 0
3191 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
3196 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3197 pipe0 insn in the ready list, schedule it. */
3198 if (pipe0_clock
< clock
&& pipe_0
>= 0)
3199 schedule_i
= pipe_0
;
3201 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3202 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3204 schedule_i
= pipe_1
;
3206 if (schedule_i
> -1)
3208 insn
= ready
[schedule_i
];
3209 ready
[schedule_i
] = ready
[nready
- 1];
3210 ready
[nready
- 1] = insn
;
3216 /* INSN is dependent on DEP_INSN. */
3218 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
3222 /* The blockage pattern is used to prevent instructions from being
3223 moved across it and has no cost. */
3224 if (INSN_CODE (insn
) == CODE_FOR_blockage
3225 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3228 if ((INSN_P (insn
) && get_attr_length (insn
) == 0)
3229 || (INSN_P (dep_insn
) && get_attr_length (dep_insn
) == 0))
3232 /* Make sure hbrps are spread out. */
3233 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3234 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3237 /* Make sure hints and hbrps are 2 cycles apart. */
3238 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3239 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3240 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3241 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3244 /* An hbrp has no real dependency on other insns. */
3245 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3246 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3249 /* Assuming that it is unlikely an argument register will be used in
3250 the first cycle of the called function, we reduce the cost for
3251 slightly better scheduling of dep_insn. When not hinted, the
3252 mispredicted branch would hide the cost as well. */
3255 rtx target
= get_branch_target (insn
);
3256 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3261 /* And when returning from a function, let's assume the return values
3262 are completed sooner too. */
3263 if (CALL_P (dep_insn
))
3266 /* Make sure an instruction that loads from the back chain is schedule
3267 away from the return instruction so a hint is more likely to get
3269 if (INSN_CODE (insn
) == CODE_FOR__return
3270 && (set
= single_set (dep_insn
))
3271 && GET_CODE (SET_DEST (set
)) == REG
3272 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3275 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3276 scheduler makes every insn in a block anti-dependent on the final
3277 jump_insn. We adjust here so higher cost insns will get scheduled
3279 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3280 return insn_cost (dep_insn
) - 3;
3285 /* Create a CONST_DOUBLE from a string. */
3287 spu_float_const (const char *string
, enum machine_mode mode
)
3289 REAL_VALUE_TYPE value
;
3290 value
= REAL_VALUE_ATOF (string
, mode
);
3291 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3295 spu_constant_address_p (rtx x
)
3297 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3298 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3299 || GET_CODE (x
) == HIGH
);
3302 static enum spu_immediate
3303 which_immediate_load (HOST_WIDE_INT val
)
3305 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3307 if (val
>= -0x8000 && val
<= 0x7fff)
3309 if (val
>= 0 && val
<= 0x3ffff)
3311 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3313 if ((val
& 0xffff) == 0)
3319 /* Return true when OP can be loaded by one of the il instructions, or
3320 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3322 immediate_load_p (rtx op
, enum machine_mode mode
)
3324 if (CONSTANT_P (op
))
3326 enum immediate_class c
= classify_immediate (op
, mode
);
3327 return c
== IC_IL1
|| c
== IC_IL1s
3328 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3333 /* Return true if the first SIZE bytes of arr is a constant that can be
3334 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3335 represent the size and offset of the instruction to use. */
3337 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3339 int cpat
, run
, i
, start
;
3343 for (i
= 0; i
< size
&& cpat
; i
++)
3351 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3353 else if (arr
[i
] == 0)
3355 while (arr
[i
+run
] == run
&& i
+run
< 16)
3357 if (run
!= 4 && run
!= 8)
3362 if ((i
& (run
-1)) != 0)
3369 if (cpat
&& (run
|| size
< 16))
3376 *pstart
= start
== -1 ? 16-run
: start
;
3382 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3383 it into a register. MODE is only valid when OP is a CONST_INT. */
3384 static enum immediate_class
3385 classify_immediate (rtx op
, enum machine_mode mode
)
3388 unsigned char arr
[16];
3389 int i
, j
, repeated
, fsmbi
, repeat
;
3391 gcc_assert (CONSTANT_P (op
));
3393 if (GET_MODE (op
) != VOIDmode
)
3394 mode
= GET_MODE (op
);
3396 /* A V4SI const_vector with all identical symbols is ok. */
3399 && GET_CODE (op
) == CONST_VECTOR
3400 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3401 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3402 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3403 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3404 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3405 op
= CONST_VECTOR_ELT (op
, 0);
3407 switch (GET_CODE (op
))
3411 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3414 /* We can never know if the resulting address fits in 18 bits and can be
3415 loaded with ila. For now, assume the address will not overflow if
3416 the displacement is "small" (fits 'K' constraint). */
3417 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3419 rtx sym
= XEXP (XEXP (op
, 0), 0);
3420 rtx cst
= XEXP (XEXP (op
, 0), 1);
3422 if (GET_CODE (sym
) == SYMBOL_REF
3423 && GET_CODE (cst
) == CONST_INT
3424 && satisfies_constraint_K (cst
))
3433 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3434 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3435 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3441 constant_to_array (mode
, op
, arr
);
3443 /* Check that each 4-byte slot is identical. */
3445 for (i
= 4; i
< 16; i
+= 4)
3446 for (j
= 0; j
< 4; j
++)
3447 if (arr
[j
] != arr
[i
+ j
])
3452 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3453 val
= trunc_int_for_mode (val
, SImode
);
3455 if (which_immediate_load (val
) != SPU_NONE
)
3459 /* Any mode of 2 bytes or smaller can be loaded with an il
3461 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3465 for (i
= 0; i
< 16 && fsmbi
; i
++)
3466 if (arr
[i
] != 0 && repeat
== 0)
3468 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3471 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3473 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3486 static enum spu_immediate
3487 which_logical_immediate (HOST_WIDE_INT val
)
3489 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3491 if (val
>= -0x200 && val
<= 0x1ff)
3493 if (val
>= 0 && val
<= 0xffff)
3495 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3497 val
= trunc_int_for_mode (val
, HImode
);
3498 if (val
>= -0x200 && val
<= 0x1ff)
3500 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3502 val
= trunc_int_for_mode (val
, QImode
);
3503 if (val
>= -0x200 && val
<= 0x1ff)
3510 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3513 const_vector_immediate_p (rtx x
)
3516 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3517 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3518 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3519 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3525 logical_immediate_p (rtx op
, enum machine_mode mode
)
3528 unsigned char arr
[16];
3531 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3532 || GET_CODE (op
) == CONST_VECTOR
);
3534 if (GET_CODE (op
) == CONST_VECTOR
3535 && !const_vector_immediate_p (op
))
3538 if (GET_MODE (op
) != VOIDmode
)
3539 mode
= GET_MODE (op
);
3541 constant_to_array (mode
, op
, arr
);
3543 /* Check that bytes are repeated. */
3544 for (i
= 4; i
< 16; i
+= 4)
3545 for (j
= 0; j
< 4; j
++)
3546 if (arr
[j
] != arr
[i
+ j
])
3549 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3550 val
= trunc_int_for_mode (val
, SImode
);
3552 i
= which_logical_immediate (val
);
3553 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3557 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3560 unsigned char arr
[16];
3563 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3564 || GET_CODE (op
) == CONST_VECTOR
);
3566 if (GET_CODE (op
) == CONST_VECTOR
3567 && !const_vector_immediate_p (op
))
3570 if (GET_MODE (op
) != VOIDmode
)
3571 mode
= GET_MODE (op
);
3573 constant_to_array (mode
, op
, arr
);
3575 /* Check that bytes are repeated. */
3576 for (i
= 4; i
< 16; i
+= 4)
3577 for (j
= 0; j
< 4; j
++)
3578 if (arr
[j
] != arr
[i
+ j
])
3581 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3582 val
= trunc_int_for_mode (val
, SImode
);
3584 return val
>= 0 && val
<= 0xffff;
3588 arith_immediate_p (rtx op
, enum machine_mode mode
,
3589 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3592 unsigned char arr
[16];
3595 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3596 || GET_CODE (op
) == CONST_VECTOR
);
3598 if (GET_CODE (op
) == CONST_VECTOR
3599 && !const_vector_immediate_p (op
))
3602 if (GET_MODE (op
) != VOIDmode
)
3603 mode
= GET_MODE (op
);
3605 constant_to_array (mode
, op
, arr
);
3607 if (VECTOR_MODE_P (mode
))
3608 mode
= GET_MODE_INNER (mode
);
3610 bytes
= GET_MODE_SIZE (mode
);
3611 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3613 /* Check that bytes are repeated. */
3614 for (i
= bytes
; i
< 16; i
+= bytes
)
3615 for (j
= 0; j
< bytes
; j
++)
3616 if (arr
[j
] != arr
[i
+ j
])
3620 for (j
= 1; j
< bytes
; j
++)
3621 val
= (val
<< 8) | arr
[j
];
3623 val
= trunc_int_for_mode (val
, mode
);
3625 return val
>= low
&& val
<= high
;
3628 /* TRUE when op is an immediate and an exact power of 2, and given that
3629 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3630 all entries must be the same. */
3632 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3634 enum machine_mode int_mode
;
3636 unsigned char arr
[16];
3639 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3640 || GET_CODE (op
) == CONST_VECTOR
);
3642 if (GET_CODE (op
) == CONST_VECTOR
3643 && !const_vector_immediate_p (op
))
3646 if (GET_MODE (op
) != VOIDmode
)
3647 mode
= GET_MODE (op
);
3649 constant_to_array (mode
, op
, arr
);
3651 if (VECTOR_MODE_P (mode
))
3652 mode
= GET_MODE_INNER (mode
);
3654 bytes
= GET_MODE_SIZE (mode
);
3655 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3657 /* Check that bytes are repeated. */
3658 for (i
= bytes
; i
< 16; i
+= bytes
)
3659 for (j
= 0; j
< bytes
; j
++)
3660 if (arr
[j
] != arr
[i
+ j
])
3664 for (j
= 1; j
< bytes
; j
++)
3665 val
= (val
<< 8) | arr
[j
];
3667 val
= trunc_int_for_mode (val
, int_mode
);
3669 /* Currently, we only handle SFmode */
3670 gcc_assert (mode
== SFmode
);
3673 int exp
= (val
>> 23) - 127;
3674 return val
> 0 && (val
& 0x007fffff) == 0
3675 && exp
>= low
&& exp
<= high
;
3680 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3683 ea_symbol_ref (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
3688 if (GET_CODE (x
) == CONST
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3690 rtx plus
= XEXP (x
, 0);
3691 rtx op0
= XEXP (plus
, 0);
3692 rtx op1
= XEXP (plus
, 1);
3693 if (GET_CODE (op1
) == CONST_INT
)
3697 return (GET_CODE (x
) == SYMBOL_REF
3698 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3699 && TREE_CODE (decl
) == VAR_DECL
3700 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)));
3704 - any 32-bit constant (SImode, SFmode)
3705 - any constant that can be generated with fsmbi (any mode)
3706 - a 64-bit constant where the high and low bits are identical
3708 - a 128-bit constant where the four 32-bit words match. */
3710 spu_legitimate_constant_p (rtx x
)
3712 if (GET_CODE (x
) == HIGH
)
3715 /* Reject any __ea qualified reference. These can't appear in
3716 instructions but must be forced to the constant pool. */
3717 if (for_each_rtx (&x
, ea_symbol_ref
, 0))
3720 /* V4SI with all identical symbols is valid. */
3722 && GET_MODE (x
) == V4SImode
3723 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3724 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3725 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
3726 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3727 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3728 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
3730 if (GET_CODE (x
) == CONST_VECTOR
3731 && !const_vector_immediate_p (x
))
3736 /* Valid address are:
3737 - symbol_ref, label_ref, const
3739 - reg + const_int, where const_int is 16 byte aligned
3740 - reg + reg, alignment doesn't matter
3741 The alignment matters in the reg+const case because lqd and stqd
3742 ignore the 4 least significant bits of the const. We only care about
3743 16 byte modes because the expand phase will change all smaller MEM
3744 references to TImode. */
3746 spu_legitimate_address_p (enum machine_mode mode
,
3747 rtx x
, bool reg_ok_strict
)
3749 int aligned
= GET_MODE_SIZE (mode
) >= 16;
3751 && GET_CODE (x
) == AND
3752 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3753 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16)
3755 switch (GET_CODE (x
))
3758 return !TARGET_LARGE_MEM
;
3762 /* Keep __ea references until reload so that spu_expand_mov can see them
3764 if (ea_symbol_ref (&x
, 0))
3765 return !reload_in_progress
&& !reload_completed
;
3766 return !TARGET_LARGE_MEM
;
3769 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3777 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
3782 rtx op0
= XEXP (x
, 0);
3783 rtx op1
= XEXP (x
, 1);
3784 if (GET_CODE (op0
) == SUBREG
)
3785 op0
= XEXP (op0
, 0);
3786 if (GET_CODE (op1
) == SUBREG
)
3787 op1
= XEXP (op1
, 0);
3788 if (GET_CODE (op0
) == REG
3789 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3790 && GET_CODE (op1
) == CONST_INT
3791 && INTVAL (op1
) >= -0x2000
3792 && INTVAL (op1
) <= 0x1fff
3793 && (!aligned
|| (INTVAL (op1
) & 15) == 0))
3795 if (GET_CODE (op0
) == REG
3796 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3797 && GET_CODE (op1
) == REG
3798 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
3809 /* Like spu_legitimate_address_p, except with named addresses. */
3811 spu_addr_space_legitimate_address_p (enum machine_mode mode
, rtx x
,
3812 bool reg_ok_strict
, addr_space_t as
)
3814 if (as
== ADDR_SPACE_EA
)
3815 return (REG_P (x
) && (GET_MODE (x
) == EAmode
));
3817 else if (as
!= ADDR_SPACE_GENERIC
)
3820 return spu_legitimate_address_p (mode
, x
, reg_ok_strict
);
3823 /* When the address is reg + const_int, force the const_int into a
3826 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3827 enum machine_mode mode ATTRIBUTE_UNUSED
)
3830 /* Make sure both operands are registers. */
3831 if (GET_CODE (x
) == PLUS
)
3835 if (ALIGNED_SYMBOL_REF_P (op0
))
3837 op0
= force_reg (Pmode
, op0
);
3838 mark_reg_pointer (op0
, 128);
3840 else if (GET_CODE (op0
) != REG
)
3841 op0
= force_reg (Pmode
, op0
);
3842 if (ALIGNED_SYMBOL_REF_P (op1
))
3844 op1
= force_reg (Pmode
, op1
);
3845 mark_reg_pointer (op1
, 128);
3847 else if (GET_CODE (op1
) != REG
)
3848 op1
= force_reg (Pmode
, op1
);
3849 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3854 /* Like spu_legitimate_address, except with named address support. */
3856 spu_addr_space_legitimize_address (rtx x
, rtx oldx
, enum machine_mode mode
,
3859 if (as
!= ADDR_SPACE_GENERIC
)
3862 return spu_legitimize_address (x
, oldx
, mode
);
3865 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3866 struct attribute_spec.handler. */
3868 spu_handle_fndecl_attribute (tree
* node
,
3870 tree args ATTRIBUTE_UNUSED
,
3871 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3873 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3875 warning (0, "%qE attribute only applies to functions",
3877 *no_add_attrs
= true;
3883 /* Handle the "vector" attribute. */
3885 spu_handle_vector_attribute (tree
* node
, tree name
,
3886 tree args ATTRIBUTE_UNUSED
,
3887 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3889 tree type
= *node
, result
= NULL_TREE
;
3890 enum machine_mode mode
;
3893 while (POINTER_TYPE_P (type
)
3894 || TREE_CODE (type
) == FUNCTION_TYPE
3895 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3896 type
= TREE_TYPE (type
);
3898 mode
= TYPE_MODE (type
);
3900 unsigned_p
= TYPE_UNSIGNED (type
);
3904 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3907 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3910 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3913 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3916 result
= V4SF_type_node
;
3919 result
= V2DF_type_node
;
3925 /* Propagate qualifiers attached to the element type
3926 onto the vector type. */
3927 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3928 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3930 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3933 warning (0, "%qE attribute ignored", name
);
3935 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3940 /* Return nonzero if FUNC is a naked function. */
3942 spu_naked_function_p (tree func
)
3946 if (TREE_CODE (func
) != FUNCTION_DECL
)
3949 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3950 return a
!= NULL_TREE
;
3954 spu_initial_elimination_offset (int from
, int to
)
3956 int saved_regs_size
= spu_saved_regs_size ();
3958 if (!current_function_is_leaf
|| crtl
->outgoing_args_size
3959 || get_frame_size () || saved_regs_size
)
3960 sp_offset
= STACK_POINTER_OFFSET
;
3961 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3962 return get_frame_size () + crtl
->outgoing_args_size
+ sp_offset
;
3963 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3964 return get_frame_size ();
3965 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3966 return sp_offset
+ crtl
->outgoing_args_size
3967 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3968 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3969 return get_frame_size () + saved_regs_size
+ sp_offset
;
3975 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3977 enum machine_mode mode
= TYPE_MODE (type
);
3978 int byte_size
= ((mode
== BLKmode
)
3979 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3981 /* Make sure small structs are left justified in a register. */
3982 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3983 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3985 enum machine_mode smode
;
3988 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3989 int n
= byte_size
/ UNITS_PER_WORD
;
3990 v
= rtvec_alloc (nregs
);
3991 for (i
= 0; i
< n
; i
++)
3993 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3994 gen_rtx_REG (TImode
,
3997 GEN_INT (UNITS_PER_WORD
* i
));
3998 byte_size
-= UNITS_PER_WORD
;
4006 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
4008 gen_rtx_EXPR_LIST (VOIDmode
,
4009 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
4010 GEN_INT (UNITS_PER_WORD
* n
));
4012 return gen_rtx_PARALLEL (mode
, v
);
4014 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
4018 spu_function_arg (CUMULATIVE_ARGS
*cum
,
4019 enum machine_mode mode
,
4020 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4024 if (cum
>= MAX_REGISTER_ARGS
)
4027 byte_size
= ((mode
== BLKmode
)
4028 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4030 /* The ABI does not allow parameters to be passed partially in
4031 reg and partially in stack. */
4032 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
4035 /* Make sure small structs are left justified in a register. */
4036 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4037 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
4039 enum machine_mode smode
;
4043 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
4044 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4045 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
4047 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4050 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
4054 spu_function_arg_advance (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4055 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4057 *cum
+= (type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
4060 ? ((int_size_in_bytes (type
) + 15) / 16)
4063 : HARD_REGNO_NREGS (cum
, mode
));
4066 /* Variable sized types are passed by reference. */
4068 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
4069 enum machine_mode mode ATTRIBUTE_UNUSED
,
4070 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4072 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4078 /* Create and return the va_list datatype.
4080 On SPU, va_list is an array type equivalent to
4082 typedef struct __va_list_tag
4084 void *__args __attribute__((__aligned(16)));
4085 void *__skip __attribute__((__aligned(16)));
4089 where __args points to the arg that will be returned by the next
4090 va_arg(), and __skip points to the previous stack frame such that
4091 when __args == __skip we should advance __args by 32 bytes. */
4093 spu_build_builtin_va_list (void)
4095 tree f_args
, f_skip
, record
, type_decl
;
4098 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4101 build_decl (BUILTINS_LOCATION
,
4102 TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4104 f_args
= build_decl (BUILTINS_LOCATION
,
4105 FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
4106 f_skip
= build_decl (BUILTINS_LOCATION
,
4107 FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
4109 DECL_FIELD_CONTEXT (f_args
) = record
;
4110 DECL_ALIGN (f_args
) = 128;
4111 DECL_USER_ALIGN (f_args
) = 1;
4113 DECL_FIELD_CONTEXT (f_skip
) = record
;
4114 DECL_ALIGN (f_skip
) = 128;
4115 DECL_USER_ALIGN (f_skip
) = 1;
4117 TREE_CHAIN (record
) = type_decl
;
4118 TYPE_NAME (record
) = type_decl
;
4119 TYPE_FIELDS (record
) = f_args
;
4120 DECL_CHAIN (f_args
) = f_skip
;
4122 /* We know this is being padded and we want it too. It is an internal
4123 type so hide the warnings from the user. */
4125 warn_padded
= false;
4127 layout_type (record
);
4131 /* The correct type is an array type of one element. */
4132 return build_array_type (record
, build_index_type (size_zero_node
));
4135 /* Implement va_start by filling the va_list structure VALIST.
4136 NEXTARG points to the first anonymous stack argument.
4138 The following global variables are used to initialize
4139 the va_list structure:
4142 the CUMULATIVE_ARGS for this function
4144 crtl->args.arg_offset_rtx:
4145 holds the offset of the first anonymous stack argument
4146 (relative to the virtual arg pointer). */
4149 spu_va_start (tree valist
, rtx nextarg
)
4151 tree f_args
, f_skip
;
4154 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4155 f_skip
= DECL_CHAIN (f_args
);
4157 valist
= build_va_arg_indirect_ref (valist
);
4159 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4161 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4163 /* Find the __args area. */
4164 t
= make_tree (TREE_TYPE (args
), nextarg
);
4165 if (crtl
->args
.pretend_args_size
> 0)
4166 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
4167 size_int (-STACK_POINTER_OFFSET
));
4168 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
4169 TREE_SIDE_EFFECTS (t
) = 1;
4170 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4172 /* Find the __skip area. */
4173 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4174 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
4175 size_int (crtl
->args
.pretend_args_size
4176 - STACK_POINTER_OFFSET
));
4177 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
4178 TREE_SIDE_EFFECTS (t
) = 1;
4179 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4182 /* Gimplify va_arg by updating the va_list structure
4183 VALIST as required to retrieve an argument of type
4184 TYPE, and returning that argument.
4186 ret = va_arg(VALIST, TYPE);
4188 generates code equivalent to:
4190 paddedsize = (sizeof(TYPE) + 15) & -16;
4191 if (VALIST.__args + paddedsize > VALIST.__skip
4192 && VALIST.__args <= VALIST.__skip)
4193 addr = VALIST.__skip + 32;
4195 addr = VALIST.__args;
4196 VALIST.__args = addr + paddedsize;
4197 ret = *(TYPE *)addr;
4200 spu_gimplify_va_arg_expr (tree valist
, tree type
, gimple_seq
* pre_p
,
4201 gimple_seq
* post_p ATTRIBUTE_UNUSED
)
4203 tree f_args
, f_skip
;
4205 HOST_WIDE_INT size
, rsize
;
4206 tree paddedsize
, addr
, tmp
;
4207 bool pass_by_reference_p
;
4209 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4210 f_skip
= DECL_CHAIN (f_args
);
4212 valist
= build_simple_mem_ref (valist
);
4214 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4216 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4218 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4220 /* if an object is dynamically sized, a pointer to it is passed
4221 instead of the object itself. */
4222 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4224 if (pass_by_reference_p
)
4225 type
= build_pointer_type (type
);
4226 size
= int_size_in_bytes (type
);
4227 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4229 /* build conditional expression to calculate addr. The expression
4230 will be gimplified later. */
4231 paddedsize
= size_int (rsize
);
4232 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (args
), paddedsize
);
4233 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4234 build2 (GT_EXPR
, boolean_type_node
, tmp
, unshare_expr (skip
)),
4235 build2 (LE_EXPR
, boolean_type_node
, unshare_expr (args
),
4236 unshare_expr (skip
)));
4238 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4239 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, unshare_expr (skip
),
4240 size_int (32)), unshare_expr (args
));
4242 gimplify_assign (addr
, tmp
, pre_p
);
4244 /* update VALIST.__args */
4245 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
4246 gimplify_assign (unshare_expr (args
), tmp
, pre_p
);
4248 addr
= fold_convert (build_pointer_type_for_mode (type
, ptr_mode
, true),
4251 if (pass_by_reference_p
)
4252 addr
= build_va_arg_indirect_ref (addr
);
4254 return build_va_arg_indirect_ref (addr
);
4257 /* Save parameter registers starting with the register that corresponds
4258 to the first unnamed parameters. If the first unnamed parameter is
4259 in the stack then save no registers. Set pretend_args_size to the
4260 amount of space needed to save the registers. */
4262 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4263 tree type
, int *pretend_size
, int no_rtl
)
4272 /* cum currently points to the last named argument, we want to
4273 start at the next argument. */
4274 spu_function_arg_advance (&ncum
, mode
, type
, true);
4276 offset
= -STACK_POINTER_OFFSET
;
4277 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4279 tmp
= gen_frame_mem (V4SImode
,
4280 plus_constant (virtual_incoming_args_rtx
,
4282 emit_move_insn (tmp
,
4283 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4286 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4291 spu_conditional_register_usage (void)
4295 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4296 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4300 /* This is called any time we inspect the alignment of a register for
4303 reg_aligned_for_addr (rtx x
)
4306 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4307 return REGNO_POINTER_ALIGN (regno
) >= 128;
4310 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4311 into its SYMBOL_REF_FLAGS. */
4313 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4315 default_encode_section_info (decl
, rtl
, first
);
4317 /* If a variable has a forced alignment to < 16 bytes, mark it with
4318 SYMBOL_FLAG_ALIGN1. */
4319 if (TREE_CODE (decl
) == VAR_DECL
4320 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4321 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4324 /* Return TRUE if we are certain the mem refers to a complete object
4325 which is both 16-byte aligned and padded to a 16-byte boundary. This
4326 would make it safe to store with a single instruction.
4327 We guarantee the alignment and padding for static objects by aligning
4328 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4329 FIXME: We currently cannot guarantee this for objects on the stack
4330 because assign_parm_setup_stack calls assign_stack_local with the
4331 alignment of the parameter mode and in that case the alignment never
4332 gets adjusted by LOCAL_ALIGNMENT. */
4334 store_with_one_insn_p (rtx mem
)
4336 enum machine_mode mode
= GET_MODE (mem
);
4337 rtx addr
= XEXP (mem
, 0);
4338 if (mode
== BLKmode
)
4340 if (GET_MODE_SIZE (mode
) >= 16)
4342 /* Only static objects. */
4343 if (GET_CODE (addr
) == SYMBOL_REF
)
4345 /* We use the associated declaration to make sure the access is
4346 referring to the whole object.
4347 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4348 if it is necessary. Will there be cases where one exists, and
4349 the other does not? Will there be cases where both exist, but
4350 have different types? */
4351 tree decl
= MEM_EXPR (mem
);
4353 && TREE_CODE (decl
) == VAR_DECL
4354 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4356 decl
= SYMBOL_REF_DECL (addr
);
4358 && TREE_CODE (decl
) == VAR_DECL
4359 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4365 /* Return 1 when the address is not valid for a simple load and store as
4366 required by the '_mov*' patterns. We could make this less strict
4367 for loads, but we prefer mem's to look the same so they are more
4368 likely to be merged. */
4370 address_needs_split (rtx mem
)
4372 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4373 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4374 || !(store_with_one_insn_p (mem
)
4375 || mem_is_padded_component_ref (mem
))))
4381 static GTY(()) rtx cache_fetch
; /* __cache_fetch function */
4382 static GTY(()) rtx cache_fetch_dirty
; /* __cache_fetch_dirty function */
4383 static alias_set_type ea_alias_set
= -1; /* alias set for __ea memory */
4385 /* MEM is known to be an __ea qualified memory access. Emit a call to
4386 fetch the ppu memory to local store, and return its address in local
4390 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4394 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4395 if (!cache_fetch_dirty
)
4396 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4397 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4398 2, ea_addr
, EAmode
, ndirty
, SImode
);
4403 cache_fetch
= init_one_libfunc ("__cache_fetch");
4404 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4405 1, ea_addr
, EAmode
);
4409 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4410 dirty bit marking, inline.
4412 The cache control data structure is an array of
4414 struct __cache_tag_array
4416 unsigned int tag_lo[4];
4417 unsigned int tag_hi[4];
4418 void *data_pointer[4];
4420 vector unsigned short dirty_bits[4];
4424 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4428 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4429 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4430 rtx index_mask
= gen_reg_rtx (SImode
);
4431 rtx tag_arr
= gen_reg_rtx (Pmode
);
4432 rtx splat_mask
= gen_reg_rtx (TImode
);
4433 rtx splat
= gen_reg_rtx (V4SImode
);
4434 rtx splat_hi
= NULL_RTX
;
4435 rtx tag_index
= gen_reg_rtx (Pmode
);
4436 rtx block_off
= gen_reg_rtx (SImode
);
4437 rtx tag_addr
= gen_reg_rtx (Pmode
);
4438 rtx tag
= gen_reg_rtx (V4SImode
);
4439 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4440 rtx cache_tag_hi
= NULL_RTX
;
4441 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4442 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4443 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4444 rtx tag_equal_hi
= NULL_RTX
;
4445 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4446 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4447 rtx eq_index
= gen_reg_rtx (SImode
);
4448 rtx bcomp
, hit_label
, hit_ref
, cont_label
, insn
;
4450 if (spu_ea_model
!= 32)
4452 splat_hi
= gen_reg_rtx (V4SImode
);
4453 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4454 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4457 emit_move_insn (index_mask
, plus_constant (tag_size_sym
, -128));
4458 emit_move_insn (tag_arr
, tag_arr_sym
);
4459 v
= 0x0001020300010203LL
;
4460 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4461 ea_addr_si
= ea_addr
;
4462 if (spu_ea_model
!= 32)
4463 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4465 /* tag_index = ea_addr & (tag_array_size - 128) */
4466 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4468 /* splat ea_addr to all 4 slots. */
4469 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4470 /* Similarly for high 32 bits of ea_addr. */
4471 if (spu_ea_model
!= 32)
4472 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4474 /* block_off = ea_addr & 127 */
4475 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4477 /* tag_addr = tag_arr + tag_index */
4478 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4480 /* Read cache tags. */
4481 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4482 if (spu_ea_model
!= 32)
4483 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4484 plus_constant (tag_addr
, 16)));
4486 /* tag = ea_addr & -128 */
4487 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4489 /* Read all four cache data pointers. */
4490 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4491 plus_constant (tag_addr
, 32)));
4494 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4495 if (spu_ea_model
!= 32)
4497 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4498 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4501 /* At most one of the tags compare equal, so tag_equal has one
4502 32-bit slot set to all 1's, with the other slots all zero.
4503 gbb picks off low bit from each byte in the 128-bit registers,
4504 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4506 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4507 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4509 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4510 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4512 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4513 (rotating eq_index mod 16 bytes). */
4514 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4515 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4517 /* Add block offset to form final data address. */
4518 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4520 /* Check that we did hit. */
4521 hit_label
= gen_label_rtx ();
4522 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4523 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4524 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4525 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4527 /* Say that this branch is very likely to happen. */
4528 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4529 add_reg_note (insn
, REG_BR_PROB
, GEN_INT (v
));
4531 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4532 cont_label
= gen_label_rtx ();
4533 emit_jump_insn (gen_jump (cont_label
));
4536 emit_label (hit_label
);
4541 rtx dirty_bits
= gen_reg_rtx (TImode
);
4542 rtx dirty_off
= gen_reg_rtx (SImode
);
4543 rtx dirty_128
= gen_reg_rtx (TImode
);
4544 rtx neg_block_off
= gen_reg_rtx (SImode
);
4546 /* Set up mask with one dirty bit per byte of the mem we are
4547 writing, starting from top bit. */
4549 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4550 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4555 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4557 /* Form index into cache dirty_bits. eq_index is one of
4558 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4559 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4560 offset to each of the four dirty_bits elements. */
4561 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4563 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4565 /* Rotate bit mask to proper bit. */
4566 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4567 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4568 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4570 /* Or in the new dirty bits. */
4571 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4574 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4577 emit_label (cont_label
);
4581 expand_ea_mem (rtx mem
, bool is_store
)
4584 rtx data_addr
= gen_reg_rtx (Pmode
);
4587 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4588 if (optimize_size
|| optimize
== 0)
4589 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4591 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4593 if (ea_alias_set
== -1)
4594 ea_alias_set
= new_alias_set ();
4596 /* We generate a new MEM RTX to refer to the copy of the data
4597 in the cache. We do not copy memory attributes (except the
4598 alignment) from the original MEM, as they may no longer apply
4599 to the cache copy. */
4600 new_mem
= gen_rtx_MEM (GET_MODE (mem
), data_addr
);
4601 set_mem_alias_set (new_mem
, ea_alias_set
);
4602 set_mem_align (new_mem
, MIN (MEM_ALIGN (mem
), 128 * 8));
4608 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4610 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4613 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4615 rtx from
= SUBREG_REG (ops
[1]);
4616 enum machine_mode imode
= int_mode_for_mode (GET_MODE (from
));
4618 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4619 && GET_MODE_CLASS (imode
) == MODE_INT
4620 && subreg_lowpart_p (ops
[1]));
4622 if (GET_MODE_SIZE (imode
) < 4)
4624 if (imode
!= GET_MODE (from
))
4625 from
= gen_rtx_SUBREG (imode
, from
, 0);
4627 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4629 enum insn_code icode
= convert_optab_handler (trunc_optab
,
4631 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4634 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4638 /* At least one of the operands needs to be a register. */
4639 if ((reload_in_progress
| reload_completed
) == 0
4640 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4642 rtx temp
= force_reg (mode
, ops
[1]);
4643 emit_move_insn (ops
[0], temp
);
4646 if (reload_in_progress
|| reload_completed
)
4648 if (CONSTANT_P (ops
[1]))
4649 return spu_split_immediate (ops
);
4653 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4655 if (GET_CODE (ops
[1]) == CONST_INT
)
4657 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4658 if (val
!= INTVAL (ops
[1]))
4660 emit_move_insn (ops
[0], GEN_INT (val
));
4666 if (MEM_ADDR_SPACE (ops
[0]))
4667 ops
[0] = expand_ea_mem (ops
[0], true);
4668 return spu_split_store (ops
);
4672 if (MEM_ADDR_SPACE (ops
[1]))
4673 ops
[1] = expand_ea_mem (ops
[1], false);
4674 return spu_split_load (ops
);
4681 spu_convert_move (rtx dst
, rtx src
)
4683 enum machine_mode mode
= GET_MODE (dst
);
4684 enum machine_mode int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
4686 gcc_assert (GET_MODE (src
) == TImode
);
4687 reg
= int_mode
!= mode
? gen_reg_rtx (int_mode
) : dst
;
4688 emit_insn (gen_rtx_SET (VOIDmode
, reg
,
4689 gen_rtx_TRUNCATE (int_mode
,
4690 gen_rtx_LSHIFTRT (TImode
, src
,
4691 GEN_INT (int_mode
== DImode
? 64 : 96)))));
4692 if (int_mode
!= mode
)
4694 reg
= simplify_gen_subreg (mode
, reg
, int_mode
, 0);
4695 emit_move_insn (dst
, reg
);
4699 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4700 the address from SRC and SRC+16. Return a REG or CONST_INT that
4701 specifies how many bytes to rotate the loaded registers, plus any
4702 extra from EXTRA_ROTQBY. The address and rotate amounts are
4703 normalized to improve merging of loads and rotate computations. */
4705 spu_expand_load (rtx dst0
, rtx dst1
, rtx src
, int extra_rotby
)
4707 rtx addr
= XEXP (src
, 0);
4708 rtx p0
, p1
, rot
, addr0
, addr1
;
4714 if (MEM_ALIGN (src
) >= 128)
4715 /* Address is already aligned; simply perform a TImode load. */ ;
4716 else if (GET_CODE (addr
) == PLUS
)
4719 aligned reg + aligned reg => lqx
4720 aligned reg + unaligned reg => lqx, rotqby
4721 aligned reg + aligned const => lqd
4722 aligned reg + unaligned const => lqd, rotqbyi
4723 unaligned reg + aligned reg => lqx, rotqby
4724 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4725 unaligned reg + aligned const => lqd, rotqby
4726 unaligned reg + unaligned const -> not allowed by legitimate address
4728 p0
= XEXP (addr
, 0);
4729 p1
= XEXP (addr
, 1);
4730 if (!reg_aligned_for_addr (p0
))
4732 if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4734 rot
= gen_reg_rtx (SImode
);
4735 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4737 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4741 && INTVAL (p1
) * BITS_PER_UNIT
4742 < REGNO_POINTER_ALIGN (REGNO (p0
)))
4744 rot
= gen_reg_rtx (SImode
);
4745 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4750 rtx x
= gen_reg_rtx (SImode
);
4751 emit_move_insn (x
, p1
);
4752 if (!spu_arith_operand (p1
, SImode
))
4754 rot
= gen_reg_rtx (SImode
);
4755 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4756 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4764 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4766 rot_amt
= INTVAL (p1
) & 15;
4767 if (INTVAL (p1
) & -16)
4769 p1
= GEN_INT (INTVAL (p1
) & -16);
4770 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4775 else if (REG_P (p1
) && !reg_aligned_for_addr (p1
))
4779 else if (REG_P (addr
))
4781 if (!reg_aligned_for_addr (addr
))
4784 else if (GET_CODE (addr
) == CONST
)
4786 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4787 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4788 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4790 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4792 addr
= gen_rtx_CONST (Pmode
,
4793 gen_rtx_PLUS (Pmode
,
4794 XEXP (XEXP (addr
, 0), 0),
4795 GEN_INT (rot_amt
& -16)));
4797 addr
= XEXP (XEXP (addr
, 0), 0);
4801 rot
= gen_reg_rtx (Pmode
);
4802 emit_move_insn (rot
, addr
);
4805 else if (GET_CODE (addr
) == CONST_INT
)
4807 rot_amt
= INTVAL (addr
);
4808 addr
= GEN_INT (rot_amt
& -16);
4810 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4812 rot
= gen_reg_rtx (Pmode
);
4813 emit_move_insn (rot
, addr
);
4816 rot_amt
+= extra_rotby
;
4822 rtx x
= gen_reg_rtx (SImode
);
4823 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4827 if (!rot
&& rot_amt
)
4828 rot
= GEN_INT (rot_amt
);
4830 addr0
= copy_rtx (addr
);
4831 addr0
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
4832 emit_insn (gen__movti (dst0
, change_address (src
, TImode
, addr0
)));
4836 addr1
= plus_constant (copy_rtx (addr
), 16);
4837 addr1
= gen_rtx_AND (SImode
, addr1
, GEN_INT (-16));
4838 emit_insn (gen__movti (dst1
, change_address (src
, TImode
, addr1
)));
4845 spu_split_load (rtx
* ops
)
4847 enum machine_mode mode
= GET_MODE (ops
[0]);
4848 rtx addr
, load
, rot
;
4851 if (GET_MODE_SIZE (mode
) >= 16)
4854 addr
= XEXP (ops
[1], 0);
4855 gcc_assert (GET_CODE (addr
) != AND
);
4857 if (!address_needs_split (ops
[1]))
4859 ops
[1] = change_address (ops
[1], TImode
, addr
);
4860 load
= gen_reg_rtx (TImode
);
4861 emit_insn (gen__movti (load
, ops
[1]));
4862 spu_convert_move (ops
[0], load
);
4866 rot_amt
= GET_MODE_SIZE (mode
) < 4 ? GET_MODE_SIZE (mode
) - 4 : 0;
4868 load
= gen_reg_rtx (TImode
);
4869 rot
= spu_expand_load (load
, 0, ops
[1], rot_amt
);
4872 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4874 spu_convert_move (ops
[0], load
);
4879 spu_split_store (rtx
* ops
)
4881 enum machine_mode mode
= GET_MODE (ops
[0]);
4883 rtx addr
, p0
, p1
, p1_lo
, smem
;
4887 if (GET_MODE_SIZE (mode
) >= 16)
4890 addr
= XEXP (ops
[0], 0);
4891 gcc_assert (GET_CODE (addr
) != AND
);
4893 if (!address_needs_split (ops
[0]))
4895 reg
= gen_reg_rtx (TImode
);
4896 emit_insn (gen_spu_convert (reg
, ops
[1]));
4897 ops
[0] = change_address (ops
[0], TImode
, addr
);
4898 emit_move_insn (ops
[0], reg
);
4902 if (GET_CODE (addr
) == PLUS
)
4905 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4906 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4907 aligned reg + aligned const => lqd, c?d, shuf, stqx
4908 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4909 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4910 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4911 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4912 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4915 p0
= XEXP (addr
, 0);
4916 p1
= p1_lo
= XEXP (addr
, 1);
4917 if (REG_P (p0
) && GET_CODE (p1
) == CONST_INT
)
4919 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4920 if (reg_aligned_for_addr (p0
))
4922 p1
= GEN_INT (INTVAL (p1
) & -16);
4923 if (p1
== const0_rtx
)
4926 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4930 rtx x
= gen_reg_rtx (SImode
);
4931 emit_move_insn (x
, p1
);
4932 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4936 else if (REG_P (addr
))
4940 p1
= p1_lo
= const0_rtx
;
4945 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4946 p1
= 0; /* aform doesn't use p1 */
4948 if (ALIGNED_SYMBOL_REF_P (addr
))
4950 else if (GET_CODE (addr
) == CONST
4951 && GET_CODE (XEXP (addr
, 0)) == PLUS
4952 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4953 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4955 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4957 addr
= gen_rtx_CONST (Pmode
,
4958 gen_rtx_PLUS (Pmode
,
4959 XEXP (XEXP (addr
, 0), 0),
4960 GEN_INT (v
& -16)));
4962 addr
= XEXP (XEXP (addr
, 0), 0);
4963 p1_lo
= GEN_INT (v
& 15);
4965 else if (GET_CODE (addr
) == CONST_INT
)
4967 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4968 addr
= GEN_INT (INTVAL (addr
) & -16);
4972 p1_lo
= gen_reg_rtx (SImode
);
4973 emit_move_insn (p1_lo
, addr
);
4977 reg
= gen_reg_rtx (TImode
);
4979 scalar
= store_with_one_insn_p (ops
[0]);
4982 /* We could copy the flags from the ops[0] MEM to mem here,
4983 We don't because we want this load to be optimized away if
4984 possible, and copying the flags will prevent that in certain
4985 cases, e.g. consider the volatile flag. */
4987 rtx pat
= gen_reg_rtx (TImode
);
4988 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
4989 set_mem_alias_set (lmem
, 0);
4990 emit_insn (gen_movti (reg
, lmem
));
4992 if (!p0
|| reg_aligned_for_addr (p0
))
4993 p0
= stack_pointer_rtx
;
4997 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
4998 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
5002 if (GET_CODE (ops
[1]) == REG
)
5003 emit_insn (gen_spu_convert (reg
, ops
[1]));
5004 else if (GET_CODE (ops
[1]) == SUBREG
)
5005 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
5010 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
5011 emit_insn (gen_ashlti3
5012 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
5014 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
5015 /* We can't use the previous alias set because the memory has changed
5016 size and can potentially overlap objects of other types. */
5017 set_mem_alias_set (smem
, 0);
5019 emit_insn (gen_movti (smem
, reg
));
5023 /* Return TRUE if X is MEM which is a struct member reference
5024 and the member can safely be loaded and stored with a single
5025 instruction because it is padded. */
5027 mem_is_padded_component_ref (rtx x
)
5029 tree t
= MEM_EXPR (x
);
5031 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
5033 t
= TREE_OPERAND (t
, 1);
5034 if (!t
|| TREE_CODE (t
) != FIELD_DECL
5035 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
5037 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5038 r
= DECL_FIELD_CONTEXT (t
);
5039 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
5041 /* Make sure they are the same mode */
5042 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
5044 /* If there are no following fields then the field alignment assures
5045 the structure is padded to the alignment which means this field is
5047 if (TREE_CHAIN (t
) == 0)
5049 /* If the following field is also aligned then this field will be
5052 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
5057 /* Parse the -mfixed-range= option string. */
5059 fix_range (const char *const_str
)
5062 char *str
, *dash
, *comma
;
5064 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5065 REG2 are either register names or register numbers. The effect
5066 of this option is to mark the registers in the range from REG1 to
5067 REG2 as ``fixed'' so they won't be used by the compiler. */
5069 i
= strlen (const_str
);
5070 str
= (char *) alloca (i
+ 1);
5071 memcpy (str
, const_str
, i
+ 1);
5075 dash
= strchr (str
, '-');
5078 warning (0, "value of -mfixed-range must have form REG1-REG2");
5082 comma
= strchr (dash
+ 1, ',');
5086 first
= decode_reg_name (str
);
5089 warning (0, "unknown register name: %s", str
);
5093 last
= decode_reg_name (dash
+ 1);
5096 warning (0, "unknown register name: %s", dash
+ 1);
5104 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5108 for (i
= first
; i
<= last
; ++i
)
5109 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5119 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5120 can be generated using the fsmbi instruction. */
5122 fsmbi_const_p (rtx x
)
5126 /* We can always choose TImode for CONST_INT because the high bits
5127 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5128 enum immediate_class c
= classify_immediate (x
, TImode
);
5129 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
5134 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5135 can be generated using the cbd, chd, cwd or cdd instruction. */
5137 cpat_const_p (rtx x
, enum machine_mode mode
)
5141 enum immediate_class c
= classify_immediate (x
, mode
);
5142 return c
== IC_CPAT
;
5148 gen_cpat_const (rtx
* ops
)
5150 unsigned char dst
[16];
5151 int i
, offset
, shift
, isize
;
5152 if (GET_CODE (ops
[3]) != CONST_INT
5153 || GET_CODE (ops
[2]) != CONST_INT
5154 || (GET_CODE (ops
[1]) != CONST_INT
5155 && GET_CODE (ops
[1]) != REG
))
5157 if (GET_CODE (ops
[1]) == REG
5158 && (!REG_POINTER (ops
[1])
5159 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
5162 for (i
= 0; i
< 16; i
++)
5164 isize
= INTVAL (ops
[3]);
5167 else if (isize
== 2)
5171 offset
= (INTVAL (ops
[2]) +
5172 (GET_CODE (ops
[1]) ==
5173 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5174 for (i
= 0; i
< isize
; i
++)
5175 dst
[offset
+ i
] = i
+ shift
;
5176 return array_to_constant (TImode
, dst
);
5179 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5180 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5181 than 16 bytes, the value is repeated across the rest of the array. */
5183 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
5188 memset (arr
, 0, 16);
5189 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5190 if (GET_CODE (x
) == CONST_INT
5191 || (GET_CODE (x
) == CONST_DOUBLE
5192 && (mode
== SFmode
|| mode
== DFmode
)))
5194 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5196 if (GET_CODE (x
) == CONST_DOUBLE
)
5197 val
= const_double_to_hwint (x
);
5200 first
= GET_MODE_SIZE (mode
) - 1;
5201 for (i
= first
; i
>= 0; i
--)
5203 arr
[i
] = val
& 0xff;
5206 /* Splat the constant across the whole array. */
5207 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5210 j
= (j
== first
) ? 0 : j
+ 1;
5213 else if (GET_CODE (x
) == CONST_DOUBLE
)
5215 val
= CONST_DOUBLE_LOW (x
);
5216 for (i
= 15; i
>= 8; i
--)
5218 arr
[i
] = val
& 0xff;
5221 val
= CONST_DOUBLE_HIGH (x
);
5222 for (i
= 7; i
>= 0; i
--)
5224 arr
[i
] = val
& 0xff;
5228 else if (GET_CODE (x
) == CONST_VECTOR
)
5232 mode
= GET_MODE_INNER (mode
);
5233 units
= CONST_VECTOR_NUNITS (x
);
5234 for (i
= 0; i
< units
; i
++)
5236 elt
= CONST_VECTOR_ELT (x
, i
);
5237 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5239 if (GET_CODE (elt
) == CONST_DOUBLE
)
5240 val
= const_double_to_hwint (elt
);
5243 first
= GET_MODE_SIZE (mode
) - 1;
5244 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5246 for (j
= first
; j
>= 0; j
--)
5248 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5258 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5259 smaller than 16 bytes, use the bytes that would represent that value
5260 in a register, e.g., for QImode return the value of arr[3]. */
5262 array_to_constant (enum machine_mode mode
, const unsigned char arr
[16])
5264 enum machine_mode inner_mode
;
5266 int units
, size
, i
, j
, k
;
5269 if (GET_MODE_CLASS (mode
) == MODE_INT
5270 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5272 j
= GET_MODE_SIZE (mode
);
5273 i
= j
< 4 ? 4 - j
: 0;
5274 for (val
= 0; i
< j
; i
++)
5275 val
= (val
<< 8) | arr
[i
];
5276 val
= trunc_int_for_mode (val
, mode
);
5277 return GEN_INT (val
);
5283 for (i
= high
= 0; i
< 8; i
++)
5284 high
= (high
<< 8) | arr
[i
];
5285 for (i
= 8, val
= 0; i
< 16; i
++)
5286 val
= (val
<< 8) | arr
[i
];
5287 return immed_double_const (val
, high
, TImode
);
5291 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5292 val
= trunc_int_for_mode (val
, SImode
);
5293 return hwint_to_const_double (SFmode
, val
);
5297 for (i
= 0, val
= 0; i
< 8; i
++)
5298 val
= (val
<< 8) | arr
[i
];
5299 return hwint_to_const_double (DFmode
, val
);
5302 if (!VECTOR_MODE_P (mode
))
5305 units
= GET_MODE_NUNITS (mode
);
5306 size
= GET_MODE_UNIT_SIZE (mode
);
5307 inner_mode
= GET_MODE_INNER (mode
);
5308 v
= rtvec_alloc (units
);
5310 for (k
= i
= 0; i
< units
; ++i
)
5313 for (j
= 0; j
< size
; j
++, k
++)
5314 val
= (val
<< 8) | arr
[k
];
5316 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5317 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5319 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5324 return gen_rtx_CONST_VECTOR (mode
, v
);
5328 reloc_diagnostic (rtx x
)
5331 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5334 if (GET_CODE (x
) == SYMBOL_REF
)
5335 decl
= SYMBOL_REF_DECL (x
);
5336 else if (GET_CODE (x
) == CONST
5337 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5338 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5340 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5341 if (decl
&& !DECL_P (decl
))
5344 /* The decl could be a string constant. */
5345 if (decl
&& DECL_P (decl
))
5348 /* We use last_assemble_variable_decl to get line information. It's
5349 not always going to be right and might not even be close, but will
5350 be right for the more common cases. */
5351 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5352 loc
= DECL_SOURCE_LOCATION (decl
);
5354 loc
= DECL_SOURCE_LOCATION (last_assemble_variable_decl
);
5356 if (TARGET_WARN_RELOC
)
5358 "creating run-time relocation for %qD", decl
);
5361 "creating run-time relocation for %qD", decl
);
5365 if (TARGET_WARN_RELOC
)
5366 warning_at (input_location
, 0, "creating run-time relocation");
5368 error_at (input_location
, "creating run-time relocation");
5372 /* Hook into assemble_integer so we can generate an error for run-time
5373 relocations. The SPU ABI disallows them. */
5375 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5377 /* By default run-time relocations aren't supported, but we allow them
5378 in case users support it in their own run-time loader. And we provide
5379 a warning for those users that don't. */
5380 if ((GET_CODE (x
) == SYMBOL_REF
)
5381 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5382 reloc_diagnostic (x
);
5384 return default_assemble_integer (x
, size
, aligned_p
);
5388 spu_asm_globalize_label (FILE * file
, const char *name
)
5390 fputs ("\t.global\t", file
);
5391 assemble_name (file
, name
);
5396 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
,
5397 bool speed ATTRIBUTE_UNUSED
)
5399 enum machine_mode mode
= GET_MODE (x
);
5400 int cost
= COSTS_N_INSNS (2);
5402 /* Folding to a CONST_VECTOR will use extra space but there might
5403 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5404 only if it allows us to fold away multiple insns. Changing the cost
5405 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5406 because this cost will only be compared against a single insn.
5407 if (code == CONST_VECTOR)
5408 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5411 /* Use defaults for float operations. Not accurate but good enough. */
5414 *total
= COSTS_N_INSNS (13);
5419 *total
= COSTS_N_INSNS (6);
5425 if (satisfies_constraint_K (x
))
5427 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5428 *total
= COSTS_N_INSNS (1);
5430 *total
= COSTS_N_INSNS (3);
5434 *total
= COSTS_N_INSNS (3);
5439 *total
= COSTS_N_INSNS (0);
5443 *total
= COSTS_N_INSNS (5);
5447 case FLOAT_TRUNCATE
:
5449 case UNSIGNED_FLOAT
:
5452 *total
= COSTS_N_INSNS (7);
5458 *total
= COSTS_N_INSNS (9);
5465 GET_CODE (XEXP (x
, 0)) ==
5466 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5467 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5469 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5471 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5472 cost
= COSTS_N_INSNS (14);
5473 if ((val
& 0xffff) == 0)
5474 cost
= COSTS_N_INSNS (9);
5475 else if (val
> 0 && val
< 0x10000)
5476 cost
= COSTS_N_INSNS (11);
5485 *total
= COSTS_N_INSNS (20);
5492 *total
= COSTS_N_INSNS (4);
5495 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5496 *total
= COSTS_N_INSNS (0);
5498 *total
= COSTS_N_INSNS (4);
5501 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5502 if (GET_MODE_CLASS (mode
) == MODE_INT
5503 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5504 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5505 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5510 static enum machine_mode
5511 spu_unwind_word_mode (void)
5516 /* Decide whether we can make a sibling call to a function. DECL is the
5517 declaration of the function being targeted by the call and EXP is the
5518 CALL_EXPR representing the call. */
5520 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5522 return decl
&& !TARGET_LARGE_MEM
;
5525 /* We need to correctly update the back chain pointer and the Available
5526 Stack Size (which is in the second slot of the sp register.) */
5528 spu_allocate_stack (rtx op0
, rtx op1
)
5531 rtx chain
= gen_reg_rtx (V4SImode
);
5532 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5533 rtx sp
= gen_reg_rtx (V4SImode
);
5534 rtx splatted
= gen_reg_rtx (V4SImode
);
5535 rtx pat
= gen_reg_rtx (TImode
);
5537 /* copy the back chain so we can save it back again. */
5538 emit_move_insn (chain
, stack_bot
);
5540 op1
= force_reg (SImode
, op1
);
5542 v
= 0x1020300010203ll
;
5543 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5544 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5546 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5547 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5549 if (flag_stack_check
)
5551 rtx avail
= gen_reg_rtx(SImode
);
5552 rtx result
= gen_reg_rtx(SImode
);
5553 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5554 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5555 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5558 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5560 emit_move_insn (stack_bot
, chain
);
5562 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5566 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5568 static unsigned char arr
[16] =
5569 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5570 rtx temp
= gen_reg_rtx (SImode
);
5571 rtx temp2
= gen_reg_rtx (SImode
);
5572 rtx temp3
= gen_reg_rtx (V4SImode
);
5573 rtx temp4
= gen_reg_rtx (V4SImode
);
5574 rtx pat
= gen_reg_rtx (TImode
);
5575 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5577 /* Restore the backchain from the first word, sp from the second. */
5578 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5579 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5581 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5583 /* Compute Available Stack Size for sp */
5584 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5585 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5587 /* Compute Available Stack Size for back chain */
5588 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5589 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5590 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5592 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5593 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5597 spu_init_libfuncs (void)
5599 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5600 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5601 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5602 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5603 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5604 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5605 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5606 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5607 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5608 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5609 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5611 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5612 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5614 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5615 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5616 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5617 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5618 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5619 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5622 /* Make a subreg, stripping any existing subreg. We could possibly just
5623 call simplify_subreg, but in this case we know what we want. */
5625 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5627 if (GET_CODE (x
) == SUBREG
)
5629 if (GET_MODE (x
) == mode
)
5631 return gen_rtx_SUBREG (mode
, x
, 0);
5635 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5637 return (TYPE_MODE (type
) == BLKmode
5639 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5640 || int_size_in_bytes (type
) >
5641 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5644 /* Create the built-in types and functions */
5646 enum spu_function_code
5648 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5649 #include "spu-builtins.def"
5654 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5656 struct spu_builtin_description spu_builtins
[] = {
5657 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5658 {fcode, icode, name, type, params},
5659 #include "spu-builtins.def"
5663 static GTY(()) tree spu_builtin_decls
[NUM_SPU_BUILTINS
];
5665 /* Returns the spu builtin decl for CODE. */
5668 spu_builtin_decl (unsigned code
, bool initialize_p ATTRIBUTE_UNUSED
)
5670 if (code
>= NUM_SPU_BUILTINS
)
5671 return error_mark_node
;
5673 return spu_builtin_decls
[code
];
5678 spu_init_builtins (void)
5680 struct spu_builtin_description
*d
;
5683 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5684 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5685 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5686 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5687 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5688 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5690 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5691 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5692 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5693 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5695 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5697 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5698 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5699 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5700 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5701 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5702 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5703 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5704 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5705 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5706 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5707 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5708 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5710 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5711 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5712 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5713 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5714 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5715 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5716 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5717 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5719 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5720 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5722 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5724 spu_builtin_types
[SPU_BTI_PTR
] =
5725 build_pointer_type (build_qualified_type
5727 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5729 /* For each builtin we build a new prototype. The tree code will make
5730 sure nodes are shared. */
5731 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5734 char name
[64]; /* build_function will make a copy. */
5740 /* Find last parm. */
5741 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5746 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5748 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5750 sprintf (name
, "__builtin_%s", d
->name
);
5751 spu_builtin_decls
[i
] =
5752 add_builtin_function (name
, p
, i
, BUILT_IN_MD
, NULL
, NULL_TREE
);
5753 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5754 TREE_READONLY (spu_builtin_decls
[i
]) = 1;
5756 /* These builtins don't throw. */
5757 TREE_NOTHROW (spu_builtin_decls
[i
]) = 1;
5762 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5764 static unsigned char arr
[16] =
5765 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5767 rtx temp
= gen_reg_rtx (Pmode
);
5768 rtx temp2
= gen_reg_rtx (V4SImode
);
5769 rtx temp3
= gen_reg_rtx (V4SImode
);
5770 rtx pat
= gen_reg_rtx (TImode
);
5771 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5773 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5775 /* Restore the sp. */
5776 emit_move_insn (temp
, op1
);
5777 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5779 /* Compute available stack size for sp. */
5780 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5781 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5783 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5784 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5788 spu_safe_dma (HOST_WIDE_INT channel
)
5790 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5794 spu_builtin_splats (rtx ops
[])
5796 enum machine_mode mode
= GET_MODE (ops
[0]);
5797 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5799 unsigned char arr
[16];
5800 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5801 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5805 rtx reg
= gen_reg_rtx (TImode
);
5807 if (GET_CODE (ops
[1]) != REG
5808 && GET_CODE (ops
[1]) != SUBREG
)
5809 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5815 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5821 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5826 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5831 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5837 emit_move_insn (reg
, shuf
);
5838 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5843 spu_builtin_extract (rtx ops
[])
5845 enum machine_mode mode
;
5848 mode
= GET_MODE (ops
[1]);
5850 if (GET_CODE (ops
[2]) == CONST_INT
)
5855 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5858 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5861 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5864 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5867 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5870 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5878 from
= spu_gen_subreg (TImode
, ops
[1]);
5879 rot
= gen_reg_rtx (TImode
);
5880 tmp
= gen_reg_rtx (SImode
);
5885 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5888 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5889 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5893 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5897 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5902 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5904 emit_insn (gen_spu_convert (ops
[0], rot
));
5908 spu_builtin_insert (rtx ops
[])
5910 enum machine_mode mode
= GET_MODE (ops
[0]);
5911 enum machine_mode imode
= GET_MODE_INNER (mode
);
5912 rtx mask
= gen_reg_rtx (TImode
);
5915 if (GET_CODE (ops
[3]) == CONST_INT
)
5916 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5919 offset
= gen_reg_rtx (SImode
);
5920 emit_insn (gen_mulsi3
5921 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5924 (mask
, stack_pointer_rtx
, offset
,
5925 GEN_INT (GET_MODE_SIZE (imode
))));
5926 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5930 spu_builtin_promote (rtx ops
[])
5932 enum machine_mode mode
, imode
;
5933 rtx rot
, from
, offset
;
5936 mode
= GET_MODE (ops
[0]);
5937 imode
= GET_MODE_INNER (mode
);
5939 from
= gen_reg_rtx (TImode
);
5940 rot
= spu_gen_subreg (TImode
, ops
[0]);
5942 emit_insn (gen_spu_convert (from
, ops
[1]));
5944 if (GET_CODE (ops
[2]) == CONST_INT
)
5946 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5947 if (GET_MODE_SIZE (imode
) < 4)
5948 pos
+= 4 - GET_MODE_SIZE (imode
);
5949 offset
= GEN_INT (pos
& 15);
5953 offset
= gen_reg_rtx (SImode
);
5957 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5960 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5961 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5965 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5966 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5970 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5976 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5980 spu_trampoline_init (rtx m_tramp
, tree fndecl
, rtx cxt
)
5982 rtx fnaddr
= XEXP (DECL_RTL (fndecl
), 0);
5983 rtx shuf
= gen_reg_rtx (V4SImode
);
5984 rtx insn
= gen_reg_rtx (V4SImode
);
5989 fnaddr
= force_reg (SImode
, fnaddr
);
5990 cxt
= force_reg (SImode
, cxt
);
5992 if (TARGET_LARGE_MEM
)
5994 rtx rotl
= gen_reg_rtx (V4SImode
);
5995 rtx mask
= gen_reg_rtx (V4SImode
);
5996 rtx bi
= gen_reg_rtx (SImode
);
5997 static unsigned char const shufa
[16] = {
5998 2, 3, 0, 1, 18, 19, 16, 17,
5999 0, 1, 2, 3, 16, 17, 18, 19
6001 static unsigned char const insna
[16] = {
6003 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
6005 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6008 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
6009 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
6011 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
6012 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
6013 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
6014 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
6016 mem
= adjust_address (m_tramp
, V4SImode
, 0);
6017 emit_move_insn (mem
, insn
);
6019 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
6020 mem
= adjust_address (m_tramp
, Pmode
, 16);
6021 emit_move_insn (mem
, bi
);
6025 rtx scxt
= gen_reg_rtx (SImode
);
6026 rtx sfnaddr
= gen_reg_rtx (SImode
);
6027 static unsigned char const insna
[16] = {
6028 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
6034 shufc
= gen_reg_rtx (TImode
);
6035 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
6037 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6038 fits 18 bits and the last 4 are zeros. This will be true if
6039 the stack pointer is initialized to 0x3fff0 at program start,
6040 otherwise the ila instruction will be garbage. */
6042 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
6043 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
6045 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
6046 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
6047 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
6049 mem
= adjust_address (m_tramp
, V4SImode
, 0);
6050 emit_move_insn (mem
, insn
);
6052 emit_insn (gen_sync ());
6056 spu_expand_sign_extend (rtx ops
[])
6058 unsigned char arr
[16];
6059 rtx pat
= gen_reg_rtx (TImode
);
6062 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
6063 if (GET_MODE (ops
[1]) == QImode
)
6065 sign
= gen_reg_rtx (HImode
);
6066 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
6067 for (i
= 0; i
< 16; i
++)
6073 for (i
= 0; i
< 16; i
++)
6075 switch (GET_MODE (ops
[1]))
6078 sign
= gen_reg_rtx (SImode
);
6079 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
6081 arr
[last
- 1] = 0x02;
6084 sign
= gen_reg_rtx (SImode
);
6085 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
6086 for (i
= 0; i
< 4; i
++)
6087 arr
[last
- i
] = 3 - i
;
6090 sign
= gen_reg_rtx (SImode
);
6091 c
= gen_reg_rtx (SImode
);
6092 emit_insn (gen_spu_convert (c
, ops
[1]));
6093 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
6094 for (i
= 0; i
< 8; i
++)
6095 arr
[last
- i
] = 7 - i
;
6101 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
6102 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
6105 /* expand vector initialization. If there are any constant parts,
6106 load constant parts first. Then load any non-constant parts. */
6108 spu_expand_vector_init (rtx target
, rtx vals
)
6110 enum machine_mode mode
= GET_MODE (target
);
6111 int n_elts
= GET_MODE_NUNITS (mode
);
6113 bool all_same
= true;
6114 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
6117 first
= XVECEXP (vals
, 0, 0);
6118 for (i
= 0; i
< n_elts
; ++i
)
6120 x
= XVECEXP (vals
, 0, i
);
6121 if (!(CONST_INT_P (x
)
6122 || GET_CODE (x
) == CONST_DOUBLE
6123 || GET_CODE (x
) == CONST_FIXED
))
6127 if (first_constant
== NULL_RTX
)
6130 if (i
> 0 && !rtx_equal_p (x
, first
))
6134 /* if all elements are the same, use splats to repeat elements */
6137 if (!CONSTANT_P (first
)
6138 && !register_operand (first
, GET_MODE (x
)))
6139 first
= force_reg (GET_MODE (first
), first
);
6140 emit_insn (gen_spu_splats (target
, first
));
6144 /* load constant parts */
6145 if (n_var
!= n_elts
)
6149 emit_move_insn (target
,
6150 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6154 rtx constant_parts_rtx
= copy_rtx (vals
);
6156 gcc_assert (first_constant
!= NULL_RTX
);
6157 /* fill empty slots with the first constant, this increases
6158 our chance of using splats in the recursive call below. */
6159 for (i
= 0; i
< n_elts
; ++i
)
6161 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6162 if (!(CONST_INT_P (x
)
6163 || GET_CODE (x
) == CONST_DOUBLE
6164 || GET_CODE (x
) == CONST_FIXED
))
6165 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6168 spu_expand_vector_init (target
, constant_parts_rtx
);
6172 /* load variable parts */
6175 rtx insert_operands
[4];
6177 insert_operands
[0] = target
;
6178 insert_operands
[2] = target
;
6179 for (i
= 0; i
< n_elts
; ++i
)
6181 x
= XVECEXP (vals
, 0, i
);
6182 if (!(CONST_INT_P (x
)
6183 || GET_CODE (x
) == CONST_DOUBLE
6184 || GET_CODE (x
) == CONST_FIXED
))
6186 if (!register_operand (x
, GET_MODE (x
)))
6187 x
= force_reg (GET_MODE (x
), x
);
6188 insert_operands
[1] = x
;
6189 insert_operands
[3] = GEN_INT (i
);
6190 spu_builtin_insert (insert_operands
);
6196 /* Return insn index for the vector compare instruction for given CODE,
6197 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6200 get_vec_cmp_insn (enum rtx_code code
,
6201 enum machine_mode dest_mode
,
6202 enum machine_mode op_mode
)
6208 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6209 return CODE_FOR_ceq_v16qi
;
6210 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6211 return CODE_FOR_ceq_v8hi
;
6212 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6213 return CODE_FOR_ceq_v4si
;
6214 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6215 return CODE_FOR_ceq_v4sf
;
6216 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6217 return CODE_FOR_ceq_v2df
;
6220 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6221 return CODE_FOR_cgt_v16qi
;
6222 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6223 return CODE_FOR_cgt_v8hi
;
6224 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6225 return CODE_FOR_cgt_v4si
;
6226 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6227 return CODE_FOR_cgt_v4sf
;
6228 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6229 return CODE_FOR_cgt_v2df
;
6232 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6233 return CODE_FOR_clgt_v16qi
;
6234 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6235 return CODE_FOR_clgt_v8hi
;
6236 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6237 return CODE_FOR_clgt_v4si
;
6245 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6246 DMODE is expected destination mode. This is a recursive function. */
6249 spu_emit_vector_compare (enum rtx_code rcode
,
6251 enum machine_mode dmode
)
6255 enum machine_mode dest_mode
;
6256 enum machine_mode op_mode
= GET_MODE (op1
);
6258 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6260 /* Floating point vector compare instructions uses destination V4SImode.
6261 Double floating point vector compare instructions uses destination V2DImode.
6262 Move destination to appropriate mode later. */
6263 if (dmode
== V4SFmode
)
6264 dest_mode
= V4SImode
;
6265 else if (dmode
== V2DFmode
)
6266 dest_mode
= V2DImode
;
6270 mask
= gen_reg_rtx (dest_mode
);
6271 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6273 if (vec_cmp_insn
== -1)
6275 bool swap_operands
= false;
6276 bool try_again
= false;
6281 swap_operands
= true;
6286 swap_operands
= true;
6290 /* Treat A != B as ~(A==B). */
6292 enum insn_code nor_code
;
6293 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6294 nor_code
= optab_handler (one_cmpl_optab
, dest_mode
);
6295 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6296 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
6297 if (dmode
!= dest_mode
)
6299 rtx temp
= gen_reg_rtx (dest_mode
);
6300 convert_move (temp
, mask
, 0);
6310 /* Try GT/GTU/LT/LTU OR EQ */
6313 enum insn_code ior_code
;
6314 enum rtx_code new_code
;
6318 case GE
: new_code
= GT
; break;
6319 case GEU
: new_code
= GTU
; break;
6320 case LE
: new_code
= LT
; break;
6321 case LEU
: new_code
= LTU
; break;
6326 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6327 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6329 ior_code
= optab_handler (ior_optab
, dest_mode
);
6330 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6331 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6332 if (dmode
!= dest_mode
)
6334 rtx temp
= gen_reg_rtx (dest_mode
);
6335 convert_move (temp
, mask
, 0);
6345 /* You only get two chances. */
6347 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6349 gcc_assert (vec_cmp_insn
!= -1);
6360 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6361 if (dmode
!= dest_mode
)
6363 rtx temp
= gen_reg_rtx (dest_mode
);
6364 convert_move (temp
, mask
, 0);
6371 /* Emit vector conditional expression.
6372 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6373 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6376 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6377 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6379 enum machine_mode dest_mode
= GET_MODE (dest
);
6380 enum rtx_code rcode
= GET_CODE (cond
);
6383 /* Get the vector mask for the given relational operations. */
6384 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6386 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6392 spu_force_reg (enum machine_mode mode
, rtx op
)
6395 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6397 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6398 || GET_MODE (op
) == BLKmode
)
6399 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6403 r
= force_reg (GET_MODE (op
), op
);
6404 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6406 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6411 x
= gen_reg_rtx (mode
);
6412 emit_insn (gen_spu_convert (x
, r
));
6417 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6419 HOST_WIDE_INT v
= 0;
6421 /* Check the range of immediate operands. */
6422 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6424 int range
= p
- SPU_BTI_7
;
6426 if (!CONSTANT_P (op
))
6427 error ("%s expects an integer literal in the range [%d, %d].",
6429 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6431 if (GET_CODE (op
) == CONST
6432 && (GET_CODE (XEXP (op
, 0)) == PLUS
6433 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6435 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6436 op
= XEXP (XEXP (op
, 0), 0);
6438 else if (GET_CODE (op
) == CONST_INT
)
6440 else if (GET_CODE (op
) == CONST_VECTOR
6441 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6442 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6444 /* The default for v is 0 which is valid in every range. */
6445 if (v
< spu_builtin_range
[range
].low
6446 || v
> spu_builtin_range
[range
].high
)
6447 error ("%s expects an integer literal in the range [%d, %d]. ("
6448 HOST_WIDE_INT_PRINT_DEC
")",
6450 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6459 /* This is only used in lqa, and stqa. Even though the insns
6460 encode 16 bits of the address (all but the 2 least
6461 significant), only 14 bits are used because it is masked to
6462 be 16 byte aligned. */
6466 /* This is used for lqr and stqr. */
6473 if (GET_CODE (op
) == LABEL_REF
6474 || (GET_CODE (op
) == SYMBOL_REF
6475 && SYMBOL_REF_FUNCTION_P (op
))
6476 || (v
& ((1 << lsbits
) - 1)) != 0)
6477 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
6484 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6485 rtx target
, rtx ops
[])
6487 enum insn_code icode
= (enum insn_code
) d
->icode
;
6490 /* Expand the arguments into rtl. */
6492 if (d
->parm
[0] != SPU_BTI_VOID
)
6495 for (a
= 0; d
->parm
[a
+1] != SPU_BTI_END_OF_PARAMS
; i
++, a
++)
6497 tree arg
= CALL_EXPR_ARG (exp
, a
);
6500 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, EXPAND_NORMAL
);
6503 /* The insn pattern may have additional operands (SCRATCH).
6504 Return the number of actual non-SCRATCH operands. */
6505 gcc_assert (i
<= insn_data
[icode
].n_operands
);
6510 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6511 tree exp
, rtx target
)
6515 enum insn_code icode
= (enum insn_code
) d
->icode
;
6516 enum machine_mode mode
, tmode
;
6521 /* Set up ops[] with values from arglist. */
6522 n_operands
= expand_builtin_args (d
, exp
, target
, ops
);
6524 /* Handle the target operand which must be operand 0. */
6526 if (d
->parm
[0] != SPU_BTI_VOID
)
6529 /* We prefer the mode specified for the match_operand otherwise
6530 use the mode from the builtin function prototype. */
6531 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6532 if (tmode
== VOIDmode
)
6533 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6535 /* Try to use target because not using it can lead to extra copies
6536 and when we are using all of the registers extra copies leads
6538 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6541 target
= ops
[0] = gen_reg_rtx (tmode
);
6543 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6549 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6551 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6556 arg
= CALL_EXPR_ARG (exp
, 0);
6557 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg
)));
6558 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6559 addr
= memory_address (mode
, op
);
6562 op
= gen_reg_rtx (GET_MODE (addr
));
6563 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6564 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6565 op
= gen_rtx_MEM (mode
, op
);
6567 pat
= GEN_FCN (icode
) (target
, op
);
6574 /* Ignore align_hint, but still expand it's args in case they have
6576 if (icode
== CODE_FOR_spu_align_hint
)
6579 /* Handle the rest of the operands. */
6580 for (p
= 1; i
< n_operands
; i
++, p
++)
6582 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6583 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6585 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6587 /* mode can be VOIDmode here for labels */
6589 /* For specific intrinsics with an immediate operand, e.g.,
6590 si_ai(), we sometimes need to convert the scalar argument to a
6591 vector argument by splatting the scalar. */
6592 if (VECTOR_MODE_P (mode
)
6593 && (GET_CODE (ops
[i
]) == CONST_INT
6594 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6595 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6597 if (GET_CODE (ops
[i
]) == CONST_INT
)
6598 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6601 rtx reg
= gen_reg_rtx (mode
);
6602 enum machine_mode imode
= GET_MODE_INNER (mode
);
6603 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6604 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6605 if (imode
!= GET_MODE (ops
[i
]))
6606 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6607 TYPE_UNSIGNED (spu_builtin_types
6609 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6614 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6616 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6617 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6623 pat
= GEN_FCN (icode
) (0);
6626 pat
= GEN_FCN (icode
) (ops
[0]);
6629 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6632 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6635 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6638 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6641 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6650 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6651 emit_call_insn (pat
);
6652 else if (d
->type
== B_JUMP
)
6654 emit_jump_insn (pat
);
6660 return_type
= spu_builtin_types
[d
->parm
[0]];
6661 if (d
->parm
[0] != SPU_BTI_VOID
6662 && GET_MODE (target
) != TYPE_MODE (return_type
))
6664 /* target is the return value. It should always be the mode of
6665 the builtin function prototype. */
6666 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6673 spu_expand_builtin (tree exp
,
6675 rtx subtarget ATTRIBUTE_UNUSED
,
6676 enum machine_mode mode ATTRIBUTE_UNUSED
,
6677 int ignore ATTRIBUTE_UNUSED
)
6679 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6680 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
);
6681 struct spu_builtin_description
*d
;
6683 if (fcode
< NUM_SPU_BUILTINS
)
6685 d
= &spu_builtins
[fcode
];
6687 return spu_expand_builtin_1 (d
, exp
, target
);
6692 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6694 spu_builtin_mul_widen_even (tree type
)
6696 switch (TYPE_MODE (type
))
6699 if (TYPE_UNSIGNED (type
))
6700 return spu_builtin_decls
[SPU_MULE_0
];
6702 return spu_builtin_decls
[SPU_MULE_1
];
6709 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6711 spu_builtin_mul_widen_odd (tree type
)
6713 switch (TYPE_MODE (type
))
6716 if (TYPE_UNSIGNED (type
))
6717 return spu_builtin_decls
[SPU_MULO_1
];
6719 return spu_builtin_decls
[SPU_MULO_0
];
6726 /* Implement targetm.vectorize.builtin_mask_for_load. */
6728 spu_builtin_mask_for_load (void)
6730 return spu_builtin_decls
[SPU_MASK_FOR_LOAD
];
6733 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6735 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost
,
6736 tree vectype ATTRIBUTE_UNUSED
,
6737 int misalign ATTRIBUTE_UNUSED
)
6739 switch (type_of_cost
)
6747 case cond_branch_not_taken
:
6755 /* Load + rotate. */
6758 case unaligned_load
:
6761 case cond_branch_taken
:
6769 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6770 after applying N number of iterations. This routine does not determine
6771 how may iterations are required to reach desired alignment. */
6774 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6779 /* All other types are naturally aligned. */
6783 /* Implement targetm.vectorize.builtin_vec_perm. */
6785 spu_builtin_vec_perm (tree type
, tree
*mask_element_type
)
6787 *mask_element_type
= unsigned_char_type_node
;
6789 switch (TYPE_MODE (type
))
6792 if (TYPE_UNSIGNED (type
))
6793 return spu_builtin_decls
[SPU_SHUFFLE_0
];
6795 return spu_builtin_decls
[SPU_SHUFFLE_1
];
6798 if (TYPE_UNSIGNED (type
))
6799 return spu_builtin_decls
[SPU_SHUFFLE_2
];
6801 return spu_builtin_decls
[SPU_SHUFFLE_3
];
6804 if (TYPE_UNSIGNED (type
))
6805 return spu_builtin_decls
[SPU_SHUFFLE_4
];
6807 return spu_builtin_decls
[SPU_SHUFFLE_5
];
6810 if (TYPE_UNSIGNED (type
))
6811 return spu_builtin_decls
[SPU_SHUFFLE_6
];
6813 return spu_builtin_decls
[SPU_SHUFFLE_7
];
6816 return spu_builtin_decls
[SPU_SHUFFLE_8
];
6819 return spu_builtin_decls
[SPU_SHUFFLE_9
];
6826 /* Return the appropriate mode for a named address pointer. */
6827 static enum machine_mode
6828 spu_addr_space_pointer_mode (addr_space_t addrspace
)
6832 case ADDR_SPACE_GENERIC
:
6841 /* Return the appropriate mode for a named address address. */
6842 static enum machine_mode
6843 spu_addr_space_address_mode (addr_space_t addrspace
)
6847 case ADDR_SPACE_GENERIC
:
6856 /* Determine if one named address space is a subset of another. */
6859 spu_addr_space_subset_p (addr_space_t subset
, addr_space_t superset
)
6861 gcc_assert (subset
== ADDR_SPACE_GENERIC
|| subset
== ADDR_SPACE_EA
);
6862 gcc_assert (superset
== ADDR_SPACE_GENERIC
|| superset
== ADDR_SPACE_EA
);
6864 if (subset
== superset
)
6867 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6868 being subsets but instead as disjoint address spaces. */
6869 else if (!TARGET_ADDRESS_SPACE_CONVERSION
)
6873 return (subset
== ADDR_SPACE_GENERIC
&& superset
== ADDR_SPACE_EA
);
6876 /* Convert from one address space to another. */
6878 spu_addr_space_convert (rtx op
, tree from_type
, tree to_type
)
6880 addr_space_t from_as
= TYPE_ADDR_SPACE (TREE_TYPE (from_type
));
6881 addr_space_t to_as
= TYPE_ADDR_SPACE (TREE_TYPE (to_type
));
6883 gcc_assert (from_as
== ADDR_SPACE_GENERIC
|| from_as
== ADDR_SPACE_EA
);
6884 gcc_assert (to_as
== ADDR_SPACE_GENERIC
|| to_as
== ADDR_SPACE_EA
);
6886 if (to_as
== ADDR_SPACE_GENERIC
&& from_as
== ADDR_SPACE_EA
)
6890 ls
= gen_const_mem (DImode
,
6891 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6892 set_mem_align (ls
, 128);
6894 result
= gen_reg_rtx (Pmode
);
6895 ls
= force_reg (Pmode
, convert_modes (Pmode
, DImode
, ls
, 1));
6896 op
= force_reg (Pmode
, convert_modes (Pmode
, EAmode
, op
, 1));
6897 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6898 ls
, const0_rtx
, Pmode
, 1);
6900 emit_insn (gen_subsi3 (result
, op
, ls
));
6905 else if (to_as
== ADDR_SPACE_EA
&& from_as
== ADDR_SPACE_GENERIC
)
6909 ls
= gen_const_mem (DImode
,
6910 gen_rtx_SYMBOL_REF (Pmode
, "__ea_local_store"));
6911 set_mem_align (ls
, 128);
6913 result
= gen_reg_rtx (EAmode
);
6914 ls
= force_reg (EAmode
, convert_modes (EAmode
, DImode
, ls
, 1));
6915 op
= force_reg (Pmode
, op
);
6916 ls
= emit_conditional_move (ls
, NE
, op
, const0_rtx
, Pmode
,
6917 ls
, const0_rtx
, EAmode
, 1);
6918 op
= force_reg (EAmode
, convert_modes (EAmode
, Pmode
, op
, 1));
6920 if (EAmode
== SImode
)
6921 emit_insn (gen_addsi3 (result
, op
, ls
));
6923 emit_insn (gen_adddi3 (result
, op
, ls
));
6933 /* Count the total number of instructions in each pipe and return the
6934 maximum, which is used as the Minimum Iteration Interval (MII)
6935 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6936 -2 are instructions that can go in pipe0 or pipe1. */
6938 spu_sms_res_mii (struct ddg
*g
)
6941 unsigned t
[4] = {0, 0, 0, 0};
6943 for (i
= 0; i
< g
->num_nodes
; i
++)
6945 rtx insn
= g
->nodes
[i
].insn
;
6946 int p
= get_pipe (insn
) + 2;
6952 if (dump_file
&& INSN_P (insn
))
6953 fprintf (dump_file
, "i%d %s %d %d\n",
6955 insn_data
[INSN_CODE(insn
)].name
,
6959 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6961 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6966 spu_init_expanders (void)
6971 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6972 frame_pointer_needed is true. We don't know that until we're
6973 expanding the prologue. */
6974 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6976 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6977 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6978 to be treated as aligned, so generate them here. */
6979 r0
= gen_reg_rtx (SImode
);
6980 r1
= gen_reg_rtx (SImode
);
6981 mark_reg_pointer (r0
, 128);
6982 mark_reg_pointer (r1
, 128);
6983 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6984 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6988 static enum machine_mode
6989 spu_libgcc_cmp_return_mode (void)
6992 /* For SPU word mode is TI mode so it is better to use SImode
6993 for compare returns. */
6997 static enum machine_mode
6998 spu_libgcc_shift_count_mode (void)
7000 /* For SPU word mode is TI mode so it is better to use SImode
7001 for shift counts. */
7005 /* An early place to adjust some flags after GCC has finished processing
7008 asm_file_start (void)
7010 /* Variable tracking should be run after all optimizations which
7011 change order of insns. It also needs a valid CFG. */
7012 spu_flag_var_tracking
= flag_var_tracking
;
7013 flag_var_tracking
= 0;
7015 default_file_start ();
7018 /* Implement targetm.section_type_flags. */
7020 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
7022 /* .toe needs to have type @nobits. */
7023 if (strcmp (name
, ".toe") == 0)
7025 /* Don't load _ea into the current address space. */
7026 if (strcmp (name
, "._ea") == 0)
7027 return SECTION_WRITE
| SECTION_DEBUG
;
7028 return default_section_type_flags (decl
, name
, reloc
);
7031 /* Implement targetm.select_section. */
7033 spu_select_section (tree decl
, int reloc
, unsigned HOST_WIDE_INT align
)
7035 /* Variables and constants defined in the __ea address space
7036 go into a special section named "._ea". */
7037 if (TREE_TYPE (decl
) != error_mark_node
7038 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) == ADDR_SPACE_EA
)
7040 /* We might get called with string constants, but get_named_section
7041 doesn't like them as they are not DECLs. Also, we need to set
7042 flags in that case. */
7044 return get_section ("._ea", SECTION_WRITE
| SECTION_DEBUG
, NULL
);
7046 return get_named_section (decl
, "._ea", reloc
);
7049 return default_elf_select_section (decl
, reloc
, align
);
7052 /* Implement targetm.unique_section. */
7054 spu_unique_section (tree decl
, int reloc
)
7056 /* We don't support unique section names in the __ea address
7058 if (TREE_TYPE (decl
) != error_mark_node
7059 && TYPE_ADDR_SPACE (TREE_TYPE (decl
)) != 0)
7062 default_unique_section (decl
, reloc
);
7065 /* Generate a constant or register which contains 2^SCALE. We assume
7066 the result is valid for MODE. Currently, MODE must be V4SFmode and
7067 SCALE must be SImode. */
7069 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
7071 gcc_assert (mode
== V4SFmode
);
7072 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
7073 if (GET_CODE (scale
) != CONST_INT
)
7075 /* unsigned int exp = (127 + scale) << 23;
7076 __vector float m = (__vector float) spu_splats (exp); */
7077 rtx reg
= force_reg (SImode
, scale
);
7078 rtx exp
= gen_reg_rtx (SImode
);
7079 rtx mul
= gen_reg_rtx (mode
);
7080 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
7081 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
7082 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
7087 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
7088 unsigned char arr
[16];
7089 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
7090 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
7091 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
7092 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
7093 return array_to_constant (mode
, arr
);
7097 /* After reload, just change the convert into a move instruction
7098 or a dead instruction. */
7100 spu_split_convert (rtx ops
[])
7102 if (REGNO (ops
[0]) == REGNO (ops
[1]))
7103 emit_note (NOTE_INSN_DELETED
);
7106 /* Use TImode always as this might help hard reg copyprop. */
7107 rtx op0
= gen_rtx_REG (TImode
, REGNO (ops
[0]));
7108 rtx op1
= gen_rtx_REG (TImode
, REGNO (ops
[1]));
7109 emit_insn (gen_move_insn (op0
, op1
));
7114 spu_function_profiler (FILE * file
, int labelno
)
7116 fprintf (file
, "# profile\n");
7117 fprintf (file
, "brsl $75, _mcount\n");