1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tree-gimple.h"
54 #include "tm-constrs.h"
60 /* Builtin types, data and prototypes. */
62 enum spu_builtin_type_index
64 SPU_BTI_END_OF_PARAMS
,
66 /* We create new type nodes for these. */
78 /* A 16-byte type. (Implemented with V16QI_type_node) */
81 /* These all correspond to intSI_type_node */
95 /* These correspond to the standard types */
115 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
116 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
117 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
118 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
119 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
120 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
121 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
122 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
123 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
124 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
126 static GTY(()) tree spu_builtin_types
[SPU_BTI_MAX
];
128 struct spu_builtin_range
133 struct spu_address_space
136 rtx (*to_generic_insn
) (rtx
, rtx
);
137 rtx (*from_generic_insn
) (rtx
, rtx
);
140 static struct spu_address_space spu_address_spaces
[] = {
141 {"generic", NULL
, NULL
},
142 {"__ea", gen_from_ea
, gen_to_ea
},
146 static struct spu_builtin_range spu_builtin_range
[] = {
147 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
148 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
149 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
150 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
151 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
152 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
153 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
154 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
155 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
156 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
157 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
158 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
162 /* Target specific attribute specifications. */
163 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
165 /* Prototypes and external defs. */
166 static void spu_init_builtins (void);
167 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
168 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
169 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
170 static rtx
get_pic_reg (void);
171 static int need_to_save_reg (int regno
, int saving
);
172 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
173 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
174 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
176 static void emit_nop_for_insn (rtx insn
);
177 static bool insn_clobbers_hbr (rtx insn
);
178 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
179 int distance
, sbitmap blocks
);
180 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
181 enum machine_mode dmode
);
182 static rtx
get_branch_target (rtx branch
);
183 static void spu_machine_dependent_reorg (void);
184 static int spu_sched_issue_rate (void);
185 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
187 static int get_pipe (rtx insn
);
188 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
189 static void spu_sched_init_global (FILE *, int, int);
190 static void spu_sched_init (FILE *, int, int);
191 static int spu_sched_reorder (FILE *, int, rtx
*, int *, int);
192 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
194 unsigned char *no_add_attrs
);
195 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
197 unsigned char *no_add_attrs
);
198 static int spu_naked_function_p (tree func
);
199 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
200 const_tree type
, unsigned char named
);
201 static tree
spu_build_builtin_va_list (void);
202 static void spu_va_start (tree
, rtx
);
203 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
205 static int store_with_one_insn_p (rtx mem
);
206 static int mem_is_padded_component_ref (rtx x
);
207 static int reg_aligned_for_addr (rtx x
, int aligned
);
208 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
209 static void spu_asm_globalize_label (FILE * file
, const char *name
);
210 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
212 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
213 static void spu_init_libfuncs (void);
214 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
215 static void fix_range (const char *);
216 static void spu_encode_section_info (tree
, rtx
, int);
217 static tree
spu_builtin_mul_widen_even (tree
);
218 static tree
spu_builtin_mul_widen_odd (tree
);
219 static tree
spu_builtin_mask_for_load (void);
220 static int spu_builtin_vectorization_cost (bool);
221 static bool spu_vector_alignment_reachable (const_tree
, bool);
222 static int spu_sms_res_mii (struct ddg
*g
);
223 static void asm_file_start (void);
224 static unsigned int spu_section_type_flags (tree
, const char *, int);
226 extern const char *reg_names
[];
227 rtx spu_compare_op0
, spu_compare_op1
, spu_expect_op0
, spu_expect_op1
;
229 /* Which instruction set architecture to use. */
231 /* Which cpu are we tuning for. */
234 /* The hardware requires 8 insns between a hint and the branch it
235 effects. This variable describes how many rtl instructions the
236 compiler needs to see before inserting a hint, and then the compiler
237 will insert enough nops to make it at least 8 insns. The default is
238 for the compiler to allow up to 2 nops be emitted. The nops are
239 inserted in pairs, so we round down. */
240 int spu_hint_dist
= (8*4) - (2*4);
242 /* Determines whether we run variable tracking in machine dependent
244 static int spu_flag_var_tracking
;
259 IC_POOL
, /* constant pool */
260 IC_IL1
, /* one il* instruction */
261 IC_IL2
, /* both ilhu and iohl instructions */
262 IC_IL1s
, /* one il* instruction */
263 IC_IL2s
, /* both ilhu and iohl instructions */
264 IC_FSMBI
, /* the fsmbi instruction */
265 IC_CPAT
, /* one of the c*d instructions */
266 IC_FSMBI2
/* fsmbi plus 1 other instruction */
269 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
270 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
271 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
272 static enum immediate_class
classify_immediate (rtx op
,
273 enum machine_mode mode
);
275 static enum machine_mode
spu_unwind_word_mode (void);
277 static enum machine_mode
278 spu_libgcc_cmp_return_mode (void);
280 static enum machine_mode
281 spu_libgcc_shift_count_mode (void);
284 /* TARGET overrides. */
286 static enum machine_mode
spu_ea_pointer_mode (int);
287 #undef TARGET_ADDR_SPACE_POINTER_MODE
288 #define TARGET_ADDR_SPACE_POINTER_MODE spu_ea_pointer_mode
290 static const char *spu_addr_space_name (int);
291 #undef TARGET_ADDR_SPACE_NAME
292 #define TARGET_ADDR_SPACE_NAME spu_addr_space_name
294 static unsigned char spu_addr_space_number (const tree
);
295 #undef TARGET_ADDR_SPACE_NUMBER
296 #define TARGET_ADDR_SPACE_NUMBER spu_addr_space_number
298 static rtx (* spu_addr_space_conversion_rtl (int, int)) (rtx
, rtx
);
299 #undef TARGET_ADDR_SPACE_CONVERSION_RTL
300 #define TARGET_ADDR_SPACE_CONVERSION_RTL spu_addr_space_conversion_rtl
302 static bool spu_valid_pointer_mode (enum machine_mode mode
);
303 #undef TARGET_VALID_POINTER_MODE
304 #define TARGET_VALID_POINTER_MODE spu_valid_pointer_mode
306 static bool spu_valid_addr_space (const_tree
);
307 #undef TARGET_VALID_ADDR_SPACE
308 #define TARGET_VALID_ADDR_SPACE spu_valid_addr_space
310 #undef TARGET_INIT_BUILTINS
311 #define TARGET_INIT_BUILTINS spu_init_builtins
313 #undef TARGET_EXPAND_BUILTIN
314 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
316 #undef TARGET_UNWIND_WORD_MODE
317 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
319 #undef TARGET_ASM_ALIGNED_DI_OP
320 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
322 #undef TARGET_RTX_COSTS
323 #define TARGET_RTX_COSTS spu_rtx_costs
325 #undef TARGET_ADDRESS_COST
326 #define TARGET_ADDRESS_COST hook_int_rtx_0
328 #undef TARGET_SCHED_ISSUE_RATE
329 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
331 #undef TARGET_SCHED_INIT_GLOBAL
332 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
334 #undef TARGET_SCHED_INIT
335 #define TARGET_SCHED_INIT spu_sched_init
337 #undef TARGET_SCHED_VARIABLE_ISSUE
338 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
340 #undef TARGET_SCHED_REORDER
341 #define TARGET_SCHED_REORDER spu_sched_reorder
343 #undef TARGET_SCHED_REORDER2
344 #define TARGET_SCHED_REORDER2 spu_sched_reorder
346 #undef TARGET_SCHED_ADJUST_COST
347 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
349 const struct attribute_spec spu_attribute_table
[];
350 #undef TARGET_ATTRIBUTE_TABLE
351 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
353 #undef TARGET_ASM_INTEGER
354 #define TARGET_ASM_INTEGER spu_assemble_integer
356 #undef TARGET_SCALAR_MODE_SUPPORTED_P
357 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
359 #undef TARGET_VECTOR_MODE_SUPPORTED_P
360 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
362 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
363 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
365 #undef TARGET_ASM_GLOBALIZE_LABEL
366 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
368 #undef TARGET_PASS_BY_REFERENCE
369 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
371 #undef TARGET_MUST_PASS_IN_STACK
372 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
374 #undef TARGET_BUILD_BUILTIN_VA_LIST
375 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
377 #undef TARGET_EXPAND_BUILTIN_VA_START
378 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
380 #undef TARGET_SETUP_INCOMING_VARARGS
381 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
383 #undef TARGET_MACHINE_DEPENDENT_REORG
384 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
386 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
387 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
389 #undef TARGET_DEFAULT_TARGET_FLAGS
390 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
392 #undef TARGET_INIT_LIBFUNCS
393 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
395 #undef TARGET_RETURN_IN_MEMORY
396 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
398 #undef TARGET_ENCODE_SECTION_INFO
399 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
401 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
402 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
404 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
405 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
407 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
408 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
410 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
411 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
413 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
414 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
416 #undef TARGET_LIBGCC_CMP_RETURN_MODE
417 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
419 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
420 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
422 #undef TARGET_SCHED_SMS_RES_MII
423 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
425 #undef TARGET_ASM_FILE_START
426 #define TARGET_ASM_FILE_START asm_file_start
428 #undef TARGET_SECTION_TYPE_FLAGS
429 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
431 struct gcc_target targetm
= TARGET_INITIALIZER
;
434 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
436 /* Override some of the default param values. With so many registers
437 larger values are better for these params. */
438 MAX_PENDING_LIST_LENGTH
= 128;
440 /* With so many registers this is better on by default. */
441 flag_rename_registers
= 1;
444 /* Sometimes certain combinations of command options do not make sense
445 on a particular target machine. You can define a macro
446 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
447 executed once just after all the command options have been parsed. */
449 spu_override_options (void)
451 /* Small loops will be unpeeled at -O3. For SPU it is more important
452 to keep code small by default. */
453 if (!flag_unroll_loops
&& !flag_peel_loops
454 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
455 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
457 flag_omit_frame_pointer
= 1;
459 /* Functions must be 8 byte aligned so we correctly handle dual issue */
460 if (align_functions
< 8)
463 spu_hint_dist
= 8*4 - spu_max_nops
*4;
464 if (spu_hint_dist
< 0)
467 if (spu_fixed_range_string
)
468 fix_range (spu_fixed_range_string
);
470 /* Determine processor architectural level. */
473 if (strcmp (&spu_arch_string
[0], "cell") == 0)
474 spu_arch
= PROCESSOR_CELL
;
475 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
476 spu_arch
= PROCESSOR_CELLEDP
;
478 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
481 /* Determine processor to tune for. */
484 if (strcmp (&spu_tune_string
[0], "cell") == 0)
485 spu_tune
= PROCESSOR_CELL
;
486 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
487 spu_tune
= PROCESSOR_CELLEDP
;
489 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
492 /* Change defaults according to the processor architecture. */
493 if (spu_arch
== PROCESSOR_CELLEDP
)
495 /* If no command line option has been otherwise specified, change
496 the default to -mno-safe-hints on celledp -- only the original
497 Cell/B.E. processors require this workaround. */
498 if (!(target_flags_explicit
& MASK_SAFE_HINTS
))
499 target_flags
&= ~MASK_SAFE_HINTS
;
502 REAL_MODE_FORMAT (SFmode
) = &spu_single_format
;
505 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
506 struct attribute_spec.handler. */
508 /* Table of machine attributes. */
509 const struct attribute_spec spu_attribute_table
[] =
511 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
512 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
513 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
514 { NULL
, 0, 0, false, false, false, NULL
}
517 /* True if MODE is valid for the target. By "valid", we mean able to
518 be manipulated in non-trivial ways. In particular, this means all
519 the arithmetic is supported. */
521 spu_scalar_mode_supported_p (enum machine_mode mode
)
539 /* Similarly for vector modes. "Supported" here is less strict. At
540 least some operations are supported; need to check optabs or builtins
541 for further details. */
543 spu_vector_mode_supported_p (enum machine_mode mode
)
560 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
561 least significant bytes of the outer mode. This function returns
562 TRUE for the SUBREG's where this is correct. */
564 valid_subreg (rtx op
)
566 enum machine_mode om
= GET_MODE (op
);
567 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
568 return om
!= VOIDmode
&& im
!= VOIDmode
569 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
570 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4)
571 || (GET_MODE_SIZE (im
) >= 16 && GET_MODE_SIZE (om
) >= 16));
574 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
575 and adjust the start offset. */
577 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
579 enum machine_mode mode
;
581 /* Strip any paradoxical SUBREG. */
582 if (GET_CODE (op
) == SUBREG
583 && (GET_MODE_BITSIZE (GET_MODE (op
))
584 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)))))
588 GET_MODE_BITSIZE (GET_MODE (op
)) -
589 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
590 op
= SUBREG_REG (op
);
592 /* If it is smaller than SI, assure a SUBREG */
593 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
597 *start
+= 32 - op_size
;
600 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
601 mode
= mode_for_size (op_size
, MODE_INT
, 0);
602 if (mode
!= GET_MODE (op
))
603 op
= gen_rtx_SUBREG (mode
, op
, 0);
608 spu_expand_extv (rtx ops
[], int unsignedp
)
610 HOST_WIDE_INT width
= INTVAL (ops
[2]);
611 HOST_WIDE_INT start
= INTVAL (ops
[3]);
612 HOST_WIDE_INT src_size
, dst_size
;
613 enum machine_mode src_mode
, dst_mode
;
614 rtx dst
= ops
[0], src
= ops
[1];
617 dst
= adjust_operand (ops
[0], 0);
618 dst_mode
= GET_MODE (dst
);
619 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
621 src
= adjust_operand (src
, &start
);
622 src_mode
= GET_MODE (src
);
623 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
627 s
= gen_reg_rtx (src_mode
);
631 emit_insn (gen_ashlsi3 (s
, src
, GEN_INT (start
)));
634 emit_insn (gen_ashldi3 (s
, src
, GEN_INT (start
)));
637 emit_insn (gen_ashlti3 (s
, src
, GEN_INT (start
)));
645 if (width
< src_size
)
652 icode
= unsignedp
? CODE_FOR_lshrsi3
: CODE_FOR_ashrsi3
;
655 icode
= unsignedp
? CODE_FOR_lshrdi3
: CODE_FOR_ashrdi3
;
658 icode
= unsignedp
? CODE_FOR_lshrti3
: CODE_FOR_ashrti3
;
663 s
= gen_reg_rtx (src_mode
);
664 pat
= GEN_FCN (icode
) (s
, src
, GEN_INT (src_size
- width
));
669 convert_move (dst
, src
, unsignedp
);
673 spu_expand_insv (rtx ops
[])
675 HOST_WIDE_INT width
= INTVAL (ops
[1]);
676 HOST_WIDE_INT start
= INTVAL (ops
[2]);
677 HOST_WIDE_INT maskbits
;
678 enum machine_mode dst_mode
, src_mode
;
679 rtx dst
= ops
[0], src
= ops
[3];
680 int dst_size
, src_size
;
686 if (GET_CODE (ops
[0]) == MEM
)
687 dst
= gen_reg_rtx (TImode
);
689 dst
= adjust_operand (dst
, &start
);
690 dst_mode
= GET_MODE (dst
);
691 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
693 if (CONSTANT_P (src
))
695 enum machine_mode m
=
696 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
697 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
699 src
= adjust_operand (src
, 0);
700 src_mode
= GET_MODE (src
);
701 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
703 mask
= gen_reg_rtx (dst_mode
);
704 shift_reg
= gen_reg_rtx (dst_mode
);
705 shift
= dst_size
- start
- width
;
707 /* It's not safe to use subreg here because the compiler assumes
708 that the SUBREG_REG is right justified in the SUBREG. */
709 convert_move (shift_reg
, src
, 1);
716 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
719 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
722 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
734 maskbits
= (-1ll << (32 - width
- start
));
736 maskbits
+= (1ll << (32 - start
));
737 emit_move_insn (mask
, GEN_INT (maskbits
));
740 maskbits
= (-1ll << (64 - width
- start
));
742 maskbits
+= (1ll << (64 - start
));
743 emit_move_insn (mask
, GEN_INT (maskbits
));
747 unsigned char arr
[16];
749 memset (arr
, 0, sizeof (arr
));
750 arr
[i
] = 0xff >> (start
& 7);
751 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
753 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
754 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
760 if (GET_CODE (ops
[0]) == MEM
)
762 rtx aligned
= gen_reg_rtx (SImode
);
763 rtx low
= gen_reg_rtx (SImode
);
764 rtx addr
= gen_reg_rtx (SImode
);
765 rtx rotl
= gen_reg_rtx (SImode
);
766 rtx mask0
= gen_reg_rtx (TImode
);
769 emit_move_insn (addr
, XEXP (ops
[0], 0));
770 emit_insn (gen_andsi3 (aligned
, addr
, GEN_INT (-16)));
771 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
772 emit_insn (gen_negsi2 (rotl
, low
));
773 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
774 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
775 mem
= change_address (ops
[0], TImode
, aligned
);
776 set_mem_alias_set (mem
, 0);
777 emit_move_insn (dst
, mem
);
778 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
779 emit_move_insn (mem
, dst
);
780 if (start
+ width
> MEM_ALIGN (ops
[0]))
782 rtx shl
= gen_reg_rtx (SImode
);
783 rtx mask1
= gen_reg_rtx (TImode
);
784 rtx dst1
= gen_reg_rtx (TImode
);
786 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
787 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
788 mem1
= adjust_address (mem
, TImode
, 16);
789 set_mem_alias_set (mem1
, 0);
790 emit_move_insn (dst1
, mem1
);
791 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
792 emit_move_insn (mem1
, dst1
);
796 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
801 spu_expand_block_move (rtx ops
[])
803 HOST_WIDE_INT bytes
, align
, offset
;
804 rtx src
, dst
, sreg
, dreg
, target
;
806 if (GET_CODE (ops
[2]) != CONST_INT
807 || GET_CODE (ops
[3]) != CONST_INT
808 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO
* 8))
811 bytes
= INTVAL (ops
[2]);
812 align
= INTVAL (ops
[3]);
822 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
824 dst
= adjust_address (ops
[0], V16QImode
, offset
);
825 src
= adjust_address (ops
[1], V16QImode
, offset
);
826 emit_move_insn (dst
, src
);
831 unsigned char arr
[16] = { 0 };
832 for (i
= 0; i
< bytes
- offset
; i
++)
834 dst
= adjust_address (ops
[0], V16QImode
, offset
);
835 src
= adjust_address (ops
[1], V16QImode
, offset
);
836 mask
= gen_reg_rtx (V16QImode
);
837 sreg
= gen_reg_rtx (V16QImode
);
838 dreg
= gen_reg_rtx (V16QImode
);
839 target
= gen_reg_rtx (V16QImode
);
840 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
841 emit_move_insn (dreg
, dst
);
842 emit_move_insn (sreg
, src
);
843 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
844 emit_move_insn (dst
, target
);
852 { SPU_EQ
, SPU_GT
, SPU_GTU
};
854 int spu_comp_icode
[12][3] = {
855 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
856 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
857 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
858 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
859 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
860 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
861 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
862 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
863 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
864 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
865 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
866 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
869 /* Generate a compare for CODE. Return a brand-new rtx that represents
870 the result of the compare. GCC can figure this out too if we don't
871 provide all variations of compares, but GCC always wants to use
872 WORD_MODE, we can generate better code in most cases if we do it
875 spu_emit_branch_or_set (int is_set
, enum rtx_code code
, rtx operands
[])
877 int reverse_compare
= 0;
878 int reverse_test
= 0;
879 rtx compare_result
, eq_result
;
880 rtx comp_rtx
, eq_rtx
;
881 rtx target
= operands
[0];
882 enum machine_mode comp_mode
;
883 enum machine_mode op_mode
;
884 enum spu_comp_code scode
, eq_code
, ior_code
;
888 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
889 and so on, to keep the constant in operand 1. */
890 if (GET_CODE (spu_compare_op1
) == CONST_INT
)
892 HOST_WIDE_INT val
= INTVAL (spu_compare_op1
) - 1;
893 if (trunc_int_for_mode (val
, GET_MODE (spu_compare_op0
)) == val
)
897 spu_compare_op1
= GEN_INT (val
);
901 spu_compare_op1
= GEN_INT (val
);
905 spu_compare_op1
= GEN_INT (val
);
909 spu_compare_op1
= GEN_INT (val
);
918 op_mode
= GET_MODE (spu_compare_op0
);
924 if (HONOR_NANS (op_mode
))
939 if (HONOR_NANS (op_mode
))
1019 comp_mode
= op_mode
;
1023 comp_mode
= op_mode
;
1027 comp_mode
= op_mode
;
1031 comp_mode
= V4SImode
;
1035 comp_mode
= V2DImode
;
1042 if (GET_MODE (spu_compare_op1
) == DFmode
1043 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
1046 if (is_set
== 0 && spu_compare_op1
== const0_rtx
1047 && (GET_MODE (spu_compare_op0
) == SImode
1048 || GET_MODE (spu_compare_op0
) == HImode
) && scode
== SPU_EQ
)
1050 /* Don't need to set a register with the result when we are
1051 comparing against zero and branching. */
1052 reverse_test
= !reverse_test
;
1053 compare_result
= spu_compare_op0
;
1057 compare_result
= gen_reg_rtx (comp_mode
);
1059 if (reverse_compare
)
1061 rtx t
= spu_compare_op1
;
1062 spu_compare_op1
= spu_compare_op0
;
1063 spu_compare_op0
= t
;
1066 if (spu_comp_icode
[index
][scode
] == 0)
1069 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
1070 (spu_compare_op0
, op_mode
))
1071 spu_compare_op0
= force_reg (op_mode
, spu_compare_op0
);
1072 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
1073 (spu_compare_op1
, op_mode
))
1074 spu_compare_op1
= force_reg (op_mode
, spu_compare_op1
);
1075 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
1080 emit_insn (comp_rtx
);
1084 eq_result
= gen_reg_rtx (comp_mode
);
1085 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
1091 ior_code
= ior_optab
->handlers
[(int)comp_mode
].insn_code
;
1092 gcc_assert (ior_code
!= CODE_FOR_nothing
);
1093 emit_insn (GEN_FCN (ior_code
)
1094 (compare_result
, compare_result
, eq_result
));
1104 /* We don't have branch on QI compare insns, so we convert the
1105 QI compare result to a HI result. */
1106 if (comp_mode
== QImode
)
1108 rtx old_res
= compare_result
;
1109 compare_result
= gen_reg_rtx (HImode
);
1111 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
1115 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
1117 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
1119 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, target
);
1120 jump_pat
= gen_rtx_SET (VOIDmode
, pc_rtx
,
1121 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
1124 if (flag_schedule_insns_after_reload
&& TARGET_BRANCH_HINTS
1125 && spu_expect_op0
&& comp_mode
== Pmode
1126 && spu_expect_op0
== spu_compare_op0
)
1128 rtx then_reg
= gen_reg_rtx (Pmode
);
1129 rtx else_reg
= gen_reg_rtx (Pmode
);
1130 rtx expect_cmp
= gen_reg_rtx (Pmode
);
1131 rtx hint_target
= gen_reg_rtx (Pmode
);
1132 rtx branch_label
= gen_label_rtx ();
1133 rtx branch_ref
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
1134 rtx then_label
= gen_label_rtx ();
1135 rtx then_ref
= gen_rtx_LABEL_REF (VOIDmode
, then_label
);
1136 rtx else_label
= gen_label_rtx ();
1137 rtx else_ref
= gen_rtx_LABEL_REF (VOIDmode
, else_label
);
1140 emit_move_insn (then_reg
, then_ref
);
1141 emit_move_insn (else_reg
, else_ref
);
1142 emit_insn (gen_clgt_si (expect_cmp
, spu_expect_op1
, const0_rtx
));
1143 emit_insn (gen_selb (hint_target
, then_reg
, else_reg
, expect_cmp
));
1144 emit_insn (gen_hbr (branch_ref
, hint_target
));
1146 LABEL_NUSES (branch_label
)++;
1147 LABEL_PRESERVE_P (branch_label
) = 1;
1148 LABEL_NUSES (then_label
)++;
1149 LABEL_PRESERVE_P (then_label
) = 1;
1150 LABEL_NUSES (else_label
)++;
1151 LABEL_PRESERVE_P (else_label
) = 1;
1153 /* We delete the labels to make sure they don't get used for
1154 anything else. The machine reorg phase will move them to
1155 the correct place. We don't try to reuse existing labels
1156 because we move these around later. */
1157 delete_insn (emit_label (branch_label
));
1158 delete_insn (emit_label (then_label
));
1159 delete_insn (emit_label (else_label
));
1161 v
= rtvec_alloc (5);
1162 RTVEC_ELT (v
, 0) = jump_pat
;
1163 RTVEC_ELT (v
, 1) = gen_rtx_USE (VOIDmode
, branch_ref
);
1164 RTVEC_ELT (v
, 2) = gen_rtx_USE (VOIDmode
, then_ref
);
1165 RTVEC_ELT (v
, 3) = gen_rtx_USE (VOIDmode
, else_ref
);
1166 RTVEC_ELT (v
, 4) = gen_rtx_CLOBBER (VOIDmode
,
1167 gen_rtx_REG (SImode
,
1169 jump_pat
= gen_rtx_PARALLEL (VOIDmode
, v
);
1172 emit_jump_insn (jump_pat
);
1174 else if (is_set
== 2)
1176 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
1177 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
1178 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
1180 rtx op_t
= operands
[2];
1181 rtx op_f
= operands
[3];
1183 /* The result of the comparison can be SI, HI or QI mode. Create a
1184 mask based on that result. */
1185 if (target_size
> compare_size
)
1187 select_mask
= gen_reg_rtx (mode
);
1188 emit_insn (gen_extend_compare (select_mask
, compare_result
));
1190 else if (target_size
< compare_size
)
1192 gen_rtx_SUBREG (mode
, compare_result
,
1193 (compare_size
- target_size
) / BITS_PER_UNIT
);
1194 else if (comp_mode
!= mode
)
1195 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
1197 select_mask
= compare_result
;
1199 if (GET_MODE (target
) != GET_MODE (op_t
)
1200 || GET_MODE (target
) != GET_MODE (op_f
))
1204 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1206 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1211 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1212 gen_rtx_NOT (comp_mode
, compare_result
)));
1213 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1214 emit_insn (gen_extendhisi2 (target
, compare_result
));
1215 else if (GET_MODE (target
) == SImode
1216 && GET_MODE (compare_result
) == QImode
)
1217 emit_insn (gen_extend_compare (target
, compare_result
));
1219 emit_move_insn (target
, compare_result
);
1221 spu_expect_op0
= spu_expect_op1
= 0;
1225 const_double_to_hwint (rtx x
)
1229 if (GET_MODE (x
) == SFmode
)
1231 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1232 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1234 else if (GET_MODE (x
) == DFmode
)
1237 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1238 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1240 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1248 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1252 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1255 tv
[0] = (v
<< 32) >> 32;
1256 else if (mode
== DFmode
)
1258 tv
[1] = (v
<< 32) >> 32;
1261 real_from_target (&rv
, tv
, mode
);
1262 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1266 print_operand_address (FILE * file
, register rtx addr
)
1271 if (GET_CODE (addr
) == AND
1272 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1273 && INTVAL (XEXP (addr
, 1)) == -16)
1274 addr
= XEXP (addr
, 0);
1276 switch (GET_CODE (addr
))
1279 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1283 reg
= XEXP (addr
, 0);
1284 offset
= XEXP (addr
, 1);
1285 if (GET_CODE (offset
) == REG
)
1287 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1288 reg_names
[REGNO (offset
)]);
1290 else if (GET_CODE (offset
) == CONST_INT
)
1292 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1293 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1303 output_addr_const (file
, addr
);
1313 print_operand (FILE * file
, rtx x
, int code
)
1315 enum machine_mode mode
= GET_MODE (x
);
1317 unsigned char arr
[16];
1318 int xcode
= GET_CODE (x
);
1320 if (GET_MODE (x
) == VOIDmode
)
1323 case 'L': /* 128 bits, signed */
1324 case 'm': /* 128 bits, signed */
1325 case 'T': /* 128 bits, signed */
1326 case 't': /* 128 bits, signed */
1329 case 'K': /* 64 bits, signed */
1330 case 'k': /* 64 bits, signed */
1331 case 'D': /* 64 bits, signed */
1332 case 'd': /* 64 bits, signed */
1335 case 'J': /* 32 bits, signed */
1336 case 'j': /* 32 bits, signed */
1337 case 's': /* 32 bits, signed */
1338 case 'S': /* 32 bits, signed */
1345 case 'j': /* 32 bits, signed */
1346 case 'k': /* 64 bits, signed */
1347 case 'm': /* 128 bits, signed */
1348 if (xcode
== CONST_INT
1349 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1351 gcc_assert (logical_immediate_p (x
, mode
));
1352 constant_to_array (mode
, x
, arr
);
1353 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1354 val
= trunc_int_for_mode (val
, SImode
);
1355 switch (which_logical_immediate (val
))
1360 fprintf (file
, "h");
1363 fprintf (file
, "b");
1373 case 'J': /* 32 bits, signed */
1374 case 'K': /* 64 bits, signed */
1375 case 'L': /* 128 bits, signed */
1376 if (xcode
== CONST_INT
1377 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1379 gcc_assert (logical_immediate_p (x
, mode
)
1380 || iohl_immediate_p (x
, mode
));
1381 constant_to_array (mode
, x
, arr
);
1382 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1383 val
= trunc_int_for_mode (val
, SImode
);
1384 switch (which_logical_immediate (val
))
1390 val
= trunc_int_for_mode (val
, HImode
);
1393 val
= trunc_int_for_mode (val
, QImode
);
1398 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1404 case 't': /* 128 bits, signed */
1405 case 'd': /* 64 bits, signed */
1406 case 's': /* 32 bits, signed */
1409 enum immediate_class c
= classify_immediate (x
, mode
);
1413 constant_to_array (mode
, x
, arr
);
1414 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1415 val
= trunc_int_for_mode (val
, SImode
);
1416 switch (which_immediate_load (val
))
1421 fprintf (file
, "a");
1424 fprintf (file
, "h");
1427 fprintf (file
, "hu");
1434 constant_to_array (mode
, x
, arr
);
1435 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1437 fprintf (file
, "b");
1439 fprintf (file
, "h");
1441 fprintf (file
, "w");
1443 fprintf (file
, "d");
1446 if (xcode
== CONST_VECTOR
)
1448 x
= CONST_VECTOR_ELT (x
, 0);
1449 xcode
= GET_CODE (x
);
1451 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1452 fprintf (file
, "a");
1453 else if (xcode
== HIGH
)
1454 fprintf (file
, "hu");
1468 case 'T': /* 128 bits, signed */
1469 case 'D': /* 64 bits, signed */
1470 case 'S': /* 32 bits, signed */
1473 enum immediate_class c
= classify_immediate (x
, mode
);
1477 constant_to_array (mode
, x
, arr
);
1478 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1479 val
= trunc_int_for_mode (val
, SImode
);
1480 switch (which_immediate_load (val
))
1487 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1492 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1495 constant_to_array (mode
, x
, arr
);
1497 for (i
= 0; i
< 16; i
++)
1502 print_operand (file
, GEN_INT (val
), 0);
1505 constant_to_array (mode
, x
, arr
);
1506 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1507 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1512 if (GET_CODE (x
) == CONST_VECTOR
)
1513 x
= CONST_VECTOR_ELT (x
, 0);
1514 output_addr_const (file
, x
);
1516 fprintf (file
, "@h");
1530 if (xcode
== CONST_INT
)
1532 /* Only 4 least significant bits are relevant for generate
1533 control word instructions. */
1534 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1539 case 'M': /* print code for c*d */
1540 if (GET_CODE (x
) == CONST_INT
)
1544 fprintf (file
, "b");
1547 fprintf (file
, "h");
1550 fprintf (file
, "w");
1553 fprintf (file
, "d");
1562 case 'N': /* Negate the operand */
1563 if (xcode
== CONST_INT
)
1564 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1565 else if (xcode
== CONST_VECTOR
)
1566 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1567 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1570 case 'I': /* enable/disable interrupts */
1571 if (xcode
== CONST_INT
)
1572 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1575 case 'b': /* branch modifiers */
1577 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1578 else if (COMPARISON_P (x
))
1579 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1582 case 'i': /* indirect call */
1585 if (GET_CODE (XEXP (x
, 0)) == REG
)
1586 /* Used in indirect function calls. */
1587 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1589 output_address (XEXP (x
, 0));
1593 case 'p': /* load/store */
1597 xcode
= GET_CODE (x
);
1602 xcode
= GET_CODE (x
);
1605 fprintf (file
, "d");
1606 else if (xcode
== CONST_INT
)
1607 fprintf (file
, "a");
1608 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1609 fprintf (file
, "r");
1610 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1612 if (GET_CODE (XEXP (x
, 1)) == REG
)
1613 fprintf (file
, "x");
1615 fprintf (file
, "d");
1620 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1622 output_addr_const (file
, GEN_INT (val
));
1626 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1628 output_addr_const (file
, GEN_INT (val
));
1632 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1634 output_addr_const (file
, GEN_INT (val
));
1638 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1639 val
= (val
>> 3) & 0x1f;
1640 output_addr_const (file
, GEN_INT (val
));
1644 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1647 output_addr_const (file
, GEN_INT (val
));
1651 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1654 output_addr_const (file
, GEN_INT (val
));
1658 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1661 output_addr_const (file
, GEN_INT (val
));
1665 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1666 val
= -(val
& -8ll);
1667 val
= (val
>> 3) & 0x1f;
1668 output_addr_const (file
, GEN_INT (val
));
1673 constant_to_array (mode
, x
, arr
);
1674 val
= (((arr
[0] << 1) + (arr
[1] >> 7)) & 0xff) - 127;
1675 output_addr_const (file
, GEN_INT (code
== 'w' ? -val
: val
));
1680 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1681 else if (xcode
== MEM
)
1682 output_address (XEXP (x
, 0));
1683 else if (xcode
== CONST_VECTOR
)
1684 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1686 output_addr_const (file
, x
);
1693 output_operand_lossage ("invalid %%xn code");
1698 extern char call_used_regs
[];
1700 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1701 caller saved register. For leaf functions it is more efficient to
1702 use a volatile register because we won't need to save and restore the
1703 pic register. This routine is only valid after register allocation
1704 is completed, so we can pick an unused register. */
1708 rtx pic_reg
= pic_offset_table_rtx
;
1709 if (!reload_completed
&& !reload_in_progress
)
1711 if (current_function_is_leaf
&& !df_regs_ever_live_p (LAST_ARG_REGNUM
))
1712 pic_reg
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
);
1716 /* Split constant addresses to handle cases that are too large.
1717 Add in the pic register when in PIC mode.
1718 Split immediates that require more than 1 instruction. */
1720 spu_split_immediate (rtx
* ops
)
1722 enum machine_mode mode
= GET_MODE (ops
[0]);
1723 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1729 unsigned char arrhi
[16];
1730 unsigned char arrlo
[16];
1731 rtx to
, temp
, hi
, lo
;
1733 enum machine_mode imode
= mode
;
1734 /* We need to do reals as ints because the constant used in the
1735 IOR might not be a legitimate real constant. */
1736 imode
= int_mode_for_mode (mode
);
1737 constant_to_array (mode
, ops
[1], arrhi
);
1739 to
= simplify_gen_subreg (imode
, ops
[0], mode
, 0);
1742 temp
= !can_create_pseudo_p () ? to
: gen_reg_rtx (imode
);
1743 for (i
= 0; i
< 16; i
+= 4)
1745 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1746 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1747 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1748 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1750 hi
= array_to_constant (imode
, arrhi
);
1751 lo
= array_to_constant (imode
, arrlo
);
1752 emit_move_insn (temp
, hi
);
1753 emit_insn (gen_rtx_SET
1754 (VOIDmode
, to
, gen_rtx_IOR (imode
, temp
, lo
)));
1759 unsigned char arr_fsmbi
[16];
1760 unsigned char arr_andbi
[16];
1761 rtx to
, reg_fsmbi
, reg_and
;
1763 enum machine_mode imode
= mode
;
1764 /* We need to do reals as ints because the constant used in the
1765 * AND might not be a legitimate real constant. */
1766 imode
= int_mode_for_mode (mode
);
1767 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1769 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1772 for (i
= 0; i
< 16; i
++)
1773 if (arr_fsmbi
[i
] != 0)
1775 arr_andbi
[0] = arr_fsmbi
[i
];
1776 arr_fsmbi
[i
] = 0xff;
1778 for (i
= 1; i
< 16; i
++)
1779 arr_andbi
[i
] = arr_andbi
[0];
1780 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1781 reg_and
= array_to_constant (imode
, arr_andbi
);
1782 emit_move_insn (to
, reg_fsmbi
);
1783 emit_insn (gen_rtx_SET
1784 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1788 if (reload_in_progress
|| reload_completed
)
1790 rtx mem
= force_const_mem (mode
, ops
[1]);
1791 if (TARGET_LARGE_MEM
)
1793 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1794 emit_move_insn (addr
, XEXP (mem
, 0));
1795 mem
= replace_equiv_address (mem
, addr
);
1797 emit_move_insn (ops
[0], mem
);
1803 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1807 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1808 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1811 emit_insn (gen_pic (ops
[0], ops
[1]));
1814 rtx pic_reg
= get_pic_reg ();
1815 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1816 current_function_uses_pic_offset_table
= 1;
1818 return flag_pic
|| c
== IC_IL2s
;
1829 /* SAVING is TRUE when we are generating the actual load and store
1830 instructions for REGNO. When determining the size of the stack
1831 needed for saving register we must allocate enough space for the
1832 worst case, because we don't always have the information early enough
1833 to not allocate it. But we can at least eliminate the actual loads
1834 and stores during the prologue/epilogue. */
1836 need_to_save_reg (int regno
, int saving
)
1838 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1841 && regno
== PIC_OFFSET_TABLE_REGNUM
1842 && (!saving
|| current_function_uses_pic_offset_table
)
1844 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1849 /* This function is only correct starting with local register
1852 spu_saved_regs_size (void)
1854 int reg_save_size
= 0;
1857 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1858 if (need_to_save_reg (regno
, 0))
1859 reg_save_size
+= 0x10;
1860 return reg_save_size
;
1864 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1866 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1868 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1869 return emit_insn (gen_movv4si (mem
, reg
));
1873 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1875 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1877 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1878 return emit_insn (gen_movv4si (reg
, mem
));
1881 /* This happens after reload, so we need to expand it. */
1883 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1886 if (satisfies_constraint_K (GEN_INT (imm
)))
1888 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1892 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1893 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1894 if (REGNO (src
) == REGNO (scratch
))
1900 /* Return nonzero if this function is known to have a null epilogue. */
1903 direct_return (void)
1905 if (reload_completed
)
1907 if (cfun
->static_chain_decl
== 0
1908 && (spu_saved_regs_size ()
1910 + current_function_outgoing_args_size
1911 + current_function_pretend_args_size
== 0)
1912 && current_function_is_leaf
)
1919 The stack frame looks like this:
1926 prev SP | back chain |
1929 | reg save | current_function_pretend_args_size bytes
1932 | saved regs | spu_saved_regs_size() bytes
1935 FP | vars | get_frame_size() bytes
1939 | args | current_function_outgoing_args_size bytes
1949 spu_expand_prologue (void)
1951 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1952 HOST_WIDE_INT total_size
;
1953 HOST_WIDE_INT saved_regs_size
;
1954 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1955 rtx scratch_reg_0
, scratch_reg_1
;
1958 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1959 the "toplevel" insn chain. */
1960 emit_note (NOTE_INSN_DELETED
);
1962 if (flag_pic
&& optimize
== 0)
1963 current_function_uses_pic_offset_table
= 1;
1965 if (spu_naked_function_p (current_function_decl
))
1968 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1969 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1971 saved_regs_size
= spu_saved_regs_size ();
1972 total_size
= size
+ saved_regs_size
1973 + current_function_outgoing_args_size
1974 + current_function_pretend_args_size
;
1976 if (!current_function_is_leaf
1977 || current_function_calls_alloca
|| total_size
> 0)
1978 total_size
+= STACK_POINTER_OFFSET
;
1980 /* Save this first because code after this might use the link
1981 register as a scratch register. */
1982 if (!current_function_is_leaf
)
1984 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1985 RTX_FRAME_RELATED_P (insn
) = 1;
1990 offset
= -current_function_pretend_args_size
;
1991 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1992 if (need_to_save_reg (regno
, 1))
1995 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1996 RTX_FRAME_RELATED_P (insn
) = 1;
2000 if (flag_pic
&& current_function_uses_pic_offset_table
)
2002 rtx pic_reg
= get_pic_reg ();
2003 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
2004 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
2009 if (flag_stack_check
)
2011 /* We compare against total_size-1 because
2012 ($sp >= total_size) <=> ($sp > total_size-1) */
2013 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
2014 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
2015 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
2016 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
2018 emit_move_insn (scratch_v4si
, size_v4si
);
2019 size_v4si
= scratch_v4si
;
2021 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
2022 emit_insn (gen_vec_extractv4si
2023 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
2024 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
2027 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2028 the value of the previous $sp because we save it as the back
2030 if (total_size
<= 2000)
2032 /* In this case we save the back chain first. */
2033 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
2035 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
2037 else if (satisfies_constraint_K (GEN_INT (-total_size
)))
2039 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
2041 emit_insn (gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
)));
2045 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
2047 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
2049 RTX_FRAME_RELATED_P (insn
) = 1;
2050 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
2052 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, real
, REG_NOTES (insn
));
2054 if (total_size
> 2000)
2056 /* Save the back chain ptr */
2057 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
2060 if (frame_pointer_needed
)
2062 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
2063 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
2064 + current_function_outgoing_args_size
;
2065 /* Set the new frame_pointer */
2066 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
2067 RTX_FRAME_RELATED_P (insn
) = 1;
2068 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
2070 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
2071 real
, REG_NOTES (insn
));
2072 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
2076 emit_note (NOTE_INSN_DELETED
);
2080 spu_expand_epilogue (bool sibcall_p
)
2082 int size
= get_frame_size (), offset
, regno
;
2083 HOST_WIDE_INT saved_regs_size
, total_size
;
2084 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
2085 rtx jump
, scratch_reg_0
;
2087 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2088 the "toplevel" insn chain. */
2089 emit_note (NOTE_INSN_DELETED
);
2091 if (spu_naked_function_p (current_function_decl
))
2094 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
2096 saved_regs_size
= spu_saved_regs_size ();
2097 total_size
= size
+ saved_regs_size
2098 + current_function_outgoing_args_size
2099 + current_function_pretend_args_size
;
2101 if (!current_function_is_leaf
2102 || current_function_calls_alloca
|| total_size
> 0)
2103 total_size
+= STACK_POINTER_OFFSET
;
2107 if (current_function_calls_alloca
)
2108 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
2110 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
2113 if (saved_regs_size
> 0)
2115 offset
= -current_function_pretend_args_size
;
2116 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
2117 if (need_to_save_reg (regno
, 1))
2120 frame_emit_load (regno
, sp_reg
, offset
);
2125 if (!current_function_is_leaf
)
2126 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
2130 emit_insn (gen_rtx_USE
2131 (VOIDmode
, gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
)));
2132 jump
= emit_jump_insn (gen__return ());
2133 emit_barrier_after (jump
);
2136 emit_note (NOTE_INSN_DELETED
);
2140 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
2144 /* This is inefficient because it ends up copying to a save-register
2145 which then gets saved even though $lr has already been saved. But
2146 it does generate better code for leaf functions and we don't need
2147 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2148 used for __builtin_return_address anyway, so maybe we don't care if
2149 it's inefficient. */
2150 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
2154 /* Given VAL, generate a constant appropriate for MODE.
2155 If MODE is a vector mode, every element will be VAL.
2156 For TImode, VAL will be zero extended to 128 bits. */
2158 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
2164 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
2165 || GET_MODE_CLASS (mode
) == MODE_FLOAT
2166 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
2167 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
2169 if (GET_MODE_CLASS (mode
) == MODE_INT
)
2170 return immed_double_const (val
, 0, mode
);
2172 /* val is the bit representation of the float */
2173 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
2174 return hwint_to_const_double (mode
, val
);
2176 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
2177 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
2179 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
2181 units
= GET_MODE_NUNITS (mode
);
2183 v
= rtvec_alloc (units
);
2185 for (i
= 0; i
< units
; ++i
)
2186 RTVEC_ELT (v
, i
) = inner
;
2188 return gen_rtx_CONST_VECTOR (mode
, v
);
2191 /* Create a MODE vector constant from 4 ints. */
2193 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
2195 unsigned char arr
[16];
2196 arr
[0] = (a
>> 24) & 0xff;
2197 arr
[1] = (a
>> 16) & 0xff;
2198 arr
[2] = (a
>> 8) & 0xff;
2199 arr
[3] = (a
>> 0) & 0xff;
2200 arr
[4] = (b
>> 24) & 0xff;
2201 arr
[5] = (b
>> 16) & 0xff;
2202 arr
[6] = (b
>> 8) & 0xff;
2203 arr
[7] = (b
>> 0) & 0xff;
2204 arr
[8] = (c
>> 24) & 0xff;
2205 arr
[9] = (c
>> 16) & 0xff;
2206 arr
[10] = (c
>> 8) & 0xff;
2207 arr
[11] = (c
>> 0) & 0xff;
2208 arr
[12] = (d
>> 24) & 0xff;
2209 arr
[13] = (d
>> 16) & 0xff;
2210 arr
[14] = (d
>> 8) & 0xff;
2211 arr
[15] = (d
>> 0) & 0xff;
2212 return array_to_constant(mode
, arr
);
2215 /* branch hint stuff */
2217 /* An array of these is used to propagate hints to predecessor blocks. */
2220 rtx prop_jump
; /* propagated from another block */
2221 int bb_index
; /* the original block. */
2223 static struct spu_bb_info
*spu_bb_info
;
2225 #define STOP_HINT_P(INSN) \
2226 (GET_CODE(INSN) == CALL_INSN \
2227 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2228 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2230 /* 1 when RTX is a hinted branch or its target. We keep track of
2231 what has been hinted so the safe-hint code can test it easily. */
2232 #define HINTED_P(RTX) \
2233 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2235 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2236 #define SCHED_ON_EVEN_P(RTX) \
2237 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2239 /* Emit a nop for INSN such that the two will dual issue. This assumes
2240 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2241 We check for TImode to handle a MULTI1 insn which has dual issued its
2242 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2245 emit_nop_for_insn (rtx insn
)
2249 p
= get_pipe (insn
);
2250 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2251 new_insn
= emit_insn_after (gen_lnop (), insn
);
2252 else if (p
== 1 && GET_MODE (insn
) == TImode
)
2254 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2255 PUT_MODE (new_insn
, TImode
);
2256 PUT_MODE (insn
, VOIDmode
);
2259 new_insn
= emit_insn_after (gen_lnop (), insn
);
2260 recog_memoized (new_insn
);
2263 /* Insert nops in basic blocks to meet dual issue alignment
2264 requirements. Also make sure hbrp and hint instructions are at least
2265 one cycle apart, possibly inserting a nop. */
2269 rtx insn
, next_insn
, prev_insn
, hbr_insn
= 0;
2273 /* This sets up INSN_ADDRESSES. */
2274 shorten_branches (get_insns ());
2276 /* Keep track of length added by nops. */
2280 insn
= get_insns ();
2281 if (!active_insn_p (insn
))
2282 insn
= next_active_insn (insn
);
2283 for (; insn
; insn
= next_insn
)
2285 next_insn
= next_active_insn (insn
);
2286 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
2287 || INSN_CODE (insn
) == CODE_FOR_hbr
)
2291 int a0
= INSN_ADDRESSES (INSN_UID (hbr_insn
));
2292 int a1
= INSN_ADDRESSES (INSN_UID (insn
));
2293 if ((a1
- a0
== 8 && GET_MODE (insn
) != TImode
)
2296 prev_insn
= emit_insn_before (gen_lnop (), insn
);
2297 PUT_MODE (prev_insn
, GET_MODE (insn
));
2298 PUT_MODE (insn
, TImode
);
2304 if (INSN_CODE (insn
) == CODE_FOR_blockage
)
2306 if (GET_MODE (insn
) == TImode
)
2307 PUT_MODE (next_insn
, TImode
);
2309 next_insn
= next_active_insn (insn
);
2311 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2312 if ((CALL_P (insn
) || JUMP_P (insn
)) && SCHED_ON_EVEN_P (insn
))
2314 if (((addr
+ length
) & 7) != 0)
2316 emit_nop_for_insn (prev_insn
);
2320 else if (GET_MODE (insn
) == TImode
2321 && ((next_insn
&& GET_MODE (next_insn
) != TImode
)
2322 || get_attr_type (insn
) == TYPE_MULTI0
)
2323 && ((addr
+ length
) & 7) != 0)
2325 /* prev_insn will always be set because the first insn is
2326 always 8-byte aligned. */
2327 emit_nop_for_insn (prev_insn
);
2335 /* Routines for branch hints. */
2338 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
2339 int distance
, sbitmap blocks
)
2341 rtx branch_label
= 0;
2346 if (before
== 0 || branch
== 0 || target
== 0)
2349 /* While scheduling we require hints to be no further than 600, so
2350 we need to enforce that here too */
2354 /* If we have a Basic block note, emit it after the basic block note. */
2355 if (NOTE_INSN_BASIC_BLOCK_P (before
))
2356 before
= NEXT_INSN (before
);
2358 if (INSN_CODE (branch
) == CODE_FOR_expect_then
2359 || INSN_CODE (branch
) == CODE_FOR_expect_else
)
2361 HINTED_P (branch
) = 1;
2362 hint
= PREV_INSN (before
);
2366 branch_label
= gen_label_rtx ();
2367 LABEL_NUSES (branch_label
)++;
2368 LABEL_PRESERVE_P (branch_label
) = 1;
2369 insn
= emit_label_before (branch_label
, branch
);
2370 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2371 SET_BIT (blocks
, BLOCK_FOR_INSN (branch
)->index
);
2373 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2374 recog_memoized (hint
);
2375 HINTED_P (branch
) = 1;
2378 if (GET_CODE (target
) == LABEL_REF
)
2379 HINTED_P (XEXP (target
, 0)) = 1;
2380 else if (tablejump_p (branch
, 0, &table
))
2384 if (GET_CODE (PATTERN (table
)) == ADDR_VEC
)
2385 vec
= XVEC (PATTERN (table
), 0);
2387 vec
= XVEC (PATTERN (table
), 1);
2388 for (j
= GET_NUM_ELEM (vec
) - 1; j
>= 0; --j
)
2389 HINTED_P (XEXP (RTVEC_ELT (vec
, j
), 0)) = 1;
2392 if (distance
>= 588)
2394 /* Make sure the hint isn't scheduled any earlier than this point,
2395 which could make it too far for the branch offest to fit */
2396 recog_memoized (emit_insn_before (gen_blockage (), hint
));
2398 else if (distance
<= 8 * 4)
2400 /* To guarantee at least 8 insns between the hint and branch we
2403 for (d
= distance
; d
< 8 * 4; d
+= 4)
2406 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode
, 127)), hint
);
2407 recog_memoized (insn
);
2410 /* Make sure any nops inserted aren't scheduled before the hint. */
2411 recog_memoized (emit_insn_after (gen_blockage (), hint
));
2413 /* Make sure any nops inserted aren't scheduled after the call. */
2414 if (CALL_P (branch
) && distance
< 8 * 4)
2415 recog_memoized (emit_insn_before (gen_blockage (), branch
));
2419 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2420 the rtx for the branch target. */
2422 get_branch_target (rtx branch
)
2424 if (GET_CODE (branch
) == JUMP_INSN
)
2428 /* Return statements */
2429 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2430 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2433 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2434 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2437 set
= single_set (branch
);
2438 src
= SET_SRC (set
);
2439 if (GET_CODE (SET_DEST (set
)) != PC
)
2442 if (GET_CODE (src
) == IF_THEN_ELSE
)
2445 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2447 if (INSN_CODE (branch
) == CODE_FOR_expect_then
)
2448 return XEXP (src
, 1);
2449 if (INSN_CODE (branch
) == CODE_FOR_expect_else
)
2450 return XEXP (src
, 2);
2454 /* If the more probable case is not a fall through, then
2455 try a branch hint. */
2456 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2457 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2458 && GET_CODE (XEXP (src
, 1)) != PC
)
2459 lab
= XEXP (src
, 1);
2460 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2461 && GET_CODE (XEXP (src
, 2)) != PC
)
2462 lab
= XEXP (src
, 2);
2466 if (GET_CODE (lab
) == RETURN
)
2467 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2475 else if (GET_CODE (branch
) == CALL_INSN
)
2478 /* All of our call patterns are in a PARALLEL and the CALL is
2479 the first pattern in the PARALLEL. */
2480 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2482 call
= XVECEXP (PATTERN (branch
), 0, 0);
2483 if (GET_CODE (call
) == SET
)
2484 call
= SET_SRC (call
);
2485 if (GET_CODE (call
) != CALL
)
2487 return XEXP (XEXP (call
, 0), 0);
2492 /* The special $hbr register is used to prevent the insn scheduler from
2493 moving hbr insns across instructions which invalidate them. It
2494 should only be used in a clobber, and this function searches for
2495 insns which clobber it. */
2497 insn_clobbers_hbr (rtx insn
)
2499 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2502 && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2504 rtx parallel
= PATTERN (insn
);
2507 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2509 clobber
= XVECEXP (parallel
, 0, j
);
2510 if (GET_CODE (clobber
) == CLOBBER
2511 && GET_CODE (XEXP (clobber
, 0)) == REG
2512 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2519 /* Search up to 32 insns starting at FIRST:
2520 - at any kind of hinted branch, just return
2521 - at any unconditional branch in the first 15 insns, just return
2522 - at a call or indirect branch, after the first 15 insns, force it to
2523 an even address and return
2524 - at any unconditional branch, after the first 15 insns, force it to
2526 At then end of the search, insert an hbrp within 4 insns of FIRST,
2527 and an hbrp within 16 instructions of FIRST.
2530 insert_hbrp_for_ilb_runout (rtx first
)
2532 rtx insn
, before_4
= 0, before_16
= 0;
2533 int addr
= 0, length
, first_addr
= -1;
2534 int hbrp_addr0
= 128 * 4, hbrp_addr1
= 128 * 4;
2535 int insert_lnop_after
= 0;
2536 for (insn
= first
; insn
; insn
= NEXT_INSN (insn
))
2539 if (first_addr
== -1)
2540 first_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2541 addr
= INSN_ADDRESSES (INSN_UID (insn
)) - first_addr
;
2542 length
= get_attr_length (insn
);
2544 if (before_4
== 0 && addr
+ length
>= 4 * 4)
2546 /* We test for 14 instructions because the first hbrp will add
2547 up to 2 instructions. */
2548 if (before_16
== 0 && addr
+ length
>= 14 * 4)
2551 if (INSN_CODE (insn
) == CODE_FOR_hbr
)
2553 /* Make sure an hbrp is at least 2 cycles away from a hint.
2554 Insert an lnop after the hbrp when necessary. */
2555 if (before_4
== 0 && addr
> 0)
2558 insert_lnop_after
|= 1;
2560 else if (before_4
&& addr
<= 4 * 4)
2561 insert_lnop_after
|= 1;
2562 if (before_16
== 0 && addr
> 10 * 4)
2565 insert_lnop_after
|= 2;
2567 else if (before_16
&& addr
<= 14 * 4)
2568 insert_lnop_after
|= 2;
2571 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
)
2573 if (addr
< hbrp_addr0
)
2575 else if (addr
< hbrp_addr1
)
2579 if (CALL_P (insn
) || JUMP_P (insn
))
2581 if (HINTED_P (insn
))
2584 /* Any branch after the first 15 insns should be on an even
2585 address to avoid a special case branch. There might be
2586 some nops and/or hbrps inserted, so we test after 10
2589 SCHED_ON_EVEN_P (insn
) = 1;
2592 if (CALL_P (insn
) || tablejump_p (insn
, 0, 0))
2596 if (addr
+ length
>= 32 * 4)
2598 gcc_assert (before_4
&& before_16
);
2599 if (hbrp_addr0
> 4 * 4)
2602 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4
);
2603 recog_memoized (insn
);
2604 INSN_ADDRESSES_NEW (insn
,
2605 INSN_ADDRESSES (INSN_UID (before_4
)));
2606 PUT_MODE (insn
, GET_MODE (before_4
));
2607 PUT_MODE (before_4
, TImode
);
2608 if (insert_lnop_after
& 1)
2610 insn
= emit_insn_before (gen_lnop (), before_4
);
2611 recog_memoized (insn
);
2612 INSN_ADDRESSES_NEW (insn
,
2613 INSN_ADDRESSES (INSN_UID (before_4
)));
2614 PUT_MODE (insn
, TImode
);
2617 if ((hbrp_addr0
<= 4 * 4 || hbrp_addr0
> 16 * 4)
2618 && hbrp_addr1
> 16 * 4)
2621 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16
);
2622 recog_memoized (insn
);
2623 INSN_ADDRESSES_NEW (insn
,
2624 INSN_ADDRESSES (INSN_UID (before_16
)));
2625 PUT_MODE (insn
, GET_MODE (before_16
));
2626 PUT_MODE (before_16
, TImode
);
2627 if (insert_lnop_after
& 2)
2629 insn
= emit_insn_before (gen_lnop (), before_16
);
2630 recog_memoized (insn
);
2631 INSN_ADDRESSES_NEW (insn
,
2632 INSN_ADDRESSES (INSN_UID
2634 PUT_MODE (insn
, TImode
);
2640 else if (BARRIER_P (insn
))
2645 /* The SPU might hang when it executes 48 inline instructions after a
2646 hinted branch jumps to its hinted target. The beginning of a
2647 function and the return from a call might have been hinted, and must
2648 be handled as well. To prevent a hang we insert 2 hbrps. The first
2649 should be within 6 insns of the branch target. The second should be
2650 within 22 insns of the branch target. When determining if hbrps are
2651 necessary, we look for only 32 inline instructions, because up to to
2652 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2653 new hbrps, we insert them within 4 and 16 insns of the target. */
2658 if (TARGET_SAFE_HINTS
)
2660 shorten_branches (get_insns ());
2661 /* Insert hbrp at beginning of function */
2662 insn
= next_active_insn (get_insns ());
2664 insert_hbrp_for_ilb_runout (insn
);
2665 /* Insert hbrp after hinted targets. */
2666 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2667 if ((LABEL_P (insn
) && HINTED_P (insn
)) || CALL_P (insn
))
2668 insert_hbrp_for_ilb_runout (next_active_insn (insn
));
2672 static int in_spu_reorg
;
2674 /* Insert branch hints. There are no branch optimizations after this
2675 pass, so it's safe to set our branch hints now. */
2677 spu_machine_dependent_reorg (void)
2682 rtx branch_target
= 0;
2683 int branch_addr
= 0, insn_addr
, required_dist
= 0;
2687 if (!TARGET_BRANCH_HINTS
|| optimize
== 0)
2689 /* We still do it for unoptimized code because an external
2690 function might have hinted a call or return. */
2696 blocks
= sbitmap_alloc (last_basic_block
);
2697 sbitmap_zero (blocks
);
2700 compute_bb_for_insn ();
2705 (struct spu_bb_info
*) xcalloc (n_basic_blocks
,
2706 sizeof (struct spu_bb_info
));
2708 /* We need exact insn addresses and lengths. */
2709 shorten_branches (get_insns ());
2711 for (i
= n_basic_blocks
- 1; i
>= 0; i
--)
2713 bb
= BASIC_BLOCK (i
);
2715 if (spu_bb_info
[i
].prop_jump
)
2717 branch
= spu_bb_info
[i
].prop_jump
;
2718 branch_target
= get_branch_target (branch
);
2719 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2720 required_dist
= spu_hint_dist
;
2722 /* Search from end of a block to beginning. In this loop, find
2723 jumps which need a branch and emit them only when:
2724 - it's an indirect branch and we're at the insn which sets
2726 - we're at an insn that will invalidate the hint. e.g., a
2727 call, another hint insn, inline asm that clobbers $hbr, and
2728 some inlined operations (divmodsi4). Don't consider jumps
2729 because they are only at the end of a block and are
2730 considered when we are deciding whether to propagate
2731 - we're getting too far away from the branch. The hbr insns
2732 only have a signed 10 bit offset
2733 We go back as far as possible so the branch will be considered
2734 for propagation when we get to the beginning of the block. */
2735 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2739 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2741 && ((GET_CODE (branch_target
) == REG
2742 && set_of (branch_target
, insn
) != NULL_RTX
)
2743 || insn_clobbers_hbr (insn
)
2744 || branch_addr
- insn_addr
> 600))
2746 rtx next
= NEXT_INSN (insn
);
2747 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2748 if (insn
!= BB_END (bb
)
2749 && branch_addr
- next_addr
>= required_dist
)
2753 "hint for %i in block %i before %i\n",
2754 INSN_UID (branch
), bb
->index
,
2756 spu_emit_branch_hint (next
, branch
, branch_target
,
2757 branch_addr
- next_addr
, blocks
);
2762 /* JUMP_P will only be true at the end of a block. When
2763 branch is already set it means we've previously decided
2764 to propagate a hint for that branch into this block. */
2765 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2768 if ((branch_target
= get_branch_target (insn
)))
2771 branch_addr
= insn_addr
;
2772 required_dist
= spu_hint_dist
;
2773 if (INSN_CODE (branch
) == CODE_FOR_expect_then
2774 || INSN_CODE (branch
) == CODE_FOR_expect_else
)
2779 if (insn
== BB_HEAD (bb
))
2785 /* If we haven't emitted a hint for this branch yet, it might
2786 be profitable to emit it in one of the predecessor blocks,
2787 especially for loops. */
2789 basic_block prev
= 0, prop
= 0, prev2
= 0;
2790 int loop_exit
= 0, simple_loop
= 0;
2791 int next_addr
= INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn
)));
2793 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2794 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2795 prev
= EDGE_PRED (bb
, j
)->src
;
2797 prev2
= EDGE_PRED (bb
, j
)->src
;
2799 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2800 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2802 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2805 /* If this branch is a loop exit then propagate to previous
2806 fallthru block. This catches the cases when it is a simple
2807 loop or when there is an initial branch into the loop. */
2808 if (prev
&& (loop_exit
|| simple_loop
)
2809 && prev
->loop_depth
<= bb
->loop_depth
)
2812 /* If there is only one adjacent predecessor. Don't propagate
2813 outside this loop. This loop_depth test isn't perfect, but
2814 I'm not sure the loop_father member is valid at this point. */
2815 else if (prev
&& single_pred_p (bb
)
2816 && prev
->loop_depth
== bb
->loop_depth
)
2819 /* If this is the JOIN block of a simple IF-THEN then
2820 propogate the hint to the HEADER block. */
2821 else if (prev
&& prev2
2822 && EDGE_COUNT (bb
->preds
) == 2
2823 && EDGE_COUNT (prev
->preds
) == 1
2824 && EDGE_PRED (prev
, 0)->src
== prev2
2825 && prev2
->loop_depth
== bb
->loop_depth
2826 && GET_CODE (branch_target
) != REG
)
2829 /* Don't propagate when:
2830 - this is a simple loop and the hint would be too far
2831 - this is not a simple loop and there are 16 insns in
2833 - the predecessor block ends in a branch that will be
2835 - the predecessor block ends in an insn that invalidates
2839 && (bbend
= BB_END (prop
))
2840 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2841 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2842 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2845 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2846 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2847 bb
->index
, prop
->index
, bb
->loop_depth
,
2848 INSN_UID (branch
), loop_exit
, simple_loop
,
2849 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2851 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2852 spu_bb_info
[prop
->index
].bb_index
= i
;
2854 else if (branch_addr
- next_addr
>= required_dist
)
2857 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2858 INSN_UID (branch
), bb
->index
,
2859 INSN_UID (NEXT_INSN (insn
)));
2860 spu_emit_branch_hint (NEXT_INSN (insn
), branch
, branch_target
,
2861 branch_addr
- next_addr
, blocks
);
2868 if (!sbitmap_empty_p (blocks
))
2869 find_many_sub_basic_blocks (blocks
);
2871 /* We have to schedule to make sure alignment is ok. */
2872 FOR_EACH_BB (bb
) bb
->flags
&= ~BB_DISABLE_SCHEDULE
;
2874 /* The hints need to be scheduled, so call it again. */
2881 for (insn
= get_insns (); insn
; insn
= NEXT_INSN (insn
))
2882 if (NONJUMP_INSN_P (insn
) && INSN_CODE (insn
) == CODE_FOR_hbr
)
2884 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2885 between its branch label and the branch . We don't move the
2886 label because GCC expects it at the beginning of the block. */
2887 rtx unspec
= SET_SRC (XVECEXP (PATTERN (insn
), 0, 0));
2888 rtx label_ref
= XVECEXP (unspec
, 0, 0);
2889 rtx label
= XEXP (label_ref
, 0);
2892 for (branch
= NEXT_INSN (label
);
2893 !JUMP_P (branch
) && !CALL_P (branch
);
2894 branch
= NEXT_INSN (branch
))
2895 if (NONJUMP_INSN_P (branch
))
2896 offset
+= get_attr_length (branch
);
2898 XVECEXP (unspec
, 0, 0) = plus_constant (label_ref
, offset
);
2900 else if (JUMP_P (insn
) && (INSN_CODE (insn
) == CODE_FOR_expect_then
2901 || INSN_CODE (insn
) == CODE_FOR_expect_else
))
2903 /* __builtin_expect with a non-constant second argument
2904 generates patterns which contain labels that need to be
2905 relocated. These are generated in spu_emit_branch_or_set. */
2906 rtx set0
= XVECEXP (PATTERN (insn
), 0, 0);
2907 rtx use1
= XVECEXP (PATTERN (insn
), 0, 1);
2908 rtx use2
= XVECEXP (PATTERN (insn
), 0, 2);
2909 rtx use3
= XVECEXP (PATTERN (insn
), 0, 3);
2910 rtx label0
= XEXP (XEXP (set0
, 1), 1);
2911 rtx label1
= XEXP (XEXP (use1
, 0), 0);
2912 rtx label2
= XEXP (XEXP (use2
, 0), 0);
2913 rtx label3
= XEXP (XEXP (use3
, 0), 0);
2914 if (GET_CODE (label0
) == PC
)
2915 label0
= XEXP (XEXP (set0
, 1), 2);
2916 remove_insn (label1
);
2917 add_insn_before (label1
, insn
, 0);
2918 if (GET_CODE (XEXP (XEXP (set0
, 1), 0)) == NE
)
2920 remove_insn (label2
);
2921 add_insn_after (label2
, insn
, 0);
2922 remove_insn (label3
);
2923 add_insn_after (label3
, XEXP (label0
, 0), 0);
2927 remove_insn (label2
);
2928 add_insn_after (label2
, XEXP (label0
, 0), 0);
2929 remove_insn (label3
);
2930 add_insn_after (label3
, insn
, 0);
2934 if (spu_flag_var_tracking
)
2937 timevar_push (TV_VAR_TRACKING
);
2938 variable_tracking_main ();
2939 timevar_pop (TV_VAR_TRACKING
);
2940 df_finish_pass (false);
2943 free_bb_for_insn ();
2949 /* Insn scheduling routines, primarily for dual issue. */
2951 spu_sched_issue_rate (void)
2957 uses_ls_unit(rtx insn
)
2959 rtx set
= single_set (insn
);
2961 && (GET_CODE (SET_DEST (set
)) == MEM
2962 || GET_CODE (SET_SRC (set
)) == MEM
))
2971 /* Handle inline asm */
2972 if (INSN_CODE (insn
) == -1)
2974 t
= get_attr_type (insn
);
2999 case TYPE_IPREFETCH
:
3007 /* haifa-sched.c has a static variable that keeps track of the current
3008 cycle. It is passed to spu_sched_reorder, and we record it here for
3009 use by spu_sched_variable_issue. It won't be accurate if the
3010 scheduler updates it's clock_var between the two calls. */
3011 static int clock_var
;
3013 /* This is used to keep track of insn alignment. Set to 0 at the
3014 beginning of each block and increased by the "length" attr of each
3016 static int spu_sched_length
;
3018 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
3019 ready list appropriately in spu_sched_reorder(). */
3020 static int pipe0_clock
;
3021 static int pipe1_clock
;
3023 static int prev_clock_var
;
3025 static int prev_priority
;
3027 /* The SPU needs to load the next ilb sometime during the execution of
3028 the previous ilb. There is a potential conflict if every cycle has a
3029 load or store. To avoid the conflict we make sure the load/store
3030 unit is free for at least one cycle during the execution of insns in
3031 the previous ilb. */
3032 static int spu_ls_first
;
3033 static int prev_ls_clock
;
3036 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3037 int max_ready ATTRIBUTE_UNUSED
)
3039 spu_sched_length
= 0;
3043 spu_sched_init (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3044 int max_ready ATTRIBUTE_UNUSED
)
3046 if (align_labels
> 4 || align_loops
> 4 || align_jumps
> 4)
3048 /* When any block might be at least 8-byte aligned, assume they
3049 will all be at least 8-byte aligned to make sure dual issue
3050 works out correctly. */
3051 spu_sched_length
= 0;
3053 spu_ls_first
= INT_MAX
;
3058 prev_clock_var
= -1;
3063 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED
,
3064 int verbose ATTRIBUTE_UNUSED
, rtx insn
, int more
)
3068 if (GET_CODE (PATTERN (insn
)) == USE
3069 || GET_CODE (PATTERN (insn
)) == CLOBBER
3070 || (len
= get_attr_length (insn
)) == 0)
3073 spu_sched_length
+= len
;
3075 /* Reset on inline asm */
3076 if (INSN_CODE (insn
) == -1)
3078 spu_ls_first
= INT_MAX
;
3083 p
= get_pipe (insn
);
3085 pipe0_clock
= clock_var
;
3087 pipe1_clock
= clock_var
;
3091 if (clock_var
- prev_ls_clock
> 1
3092 || INSN_CODE (insn
) == CODE_FOR_iprefetch
)
3093 spu_ls_first
= INT_MAX
;
3094 if (uses_ls_unit (insn
))
3096 if (spu_ls_first
== INT_MAX
)
3097 spu_ls_first
= spu_sched_length
;
3098 prev_ls_clock
= clock_var
;
3101 /* The scheduler hasn't inserted the nop, but we will later on.
3102 Include those nops in spu_sched_length. */
3103 if (prev_clock_var
== clock_var
&& (spu_sched_length
& 7))
3104 spu_sched_length
+= 4;
3105 prev_clock_var
= clock_var
;
3107 /* more is -1 when called from spu_sched_reorder for new insns
3108 that don't have INSN_PRIORITY */
3110 prev_priority
= INSN_PRIORITY (insn
);
3113 /* Always try issueing more insns. spu_sched_reorder will decide
3114 when the cycle should be advanced. */
3118 /* This function is called for both TARGET_SCHED_REORDER and
3119 TARGET_SCHED_REORDER2. */
3121 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED
, int verbose ATTRIBUTE_UNUSED
,
3122 rtx
*ready
, int *nreadyp
, int clock
)
3124 int i
, nready
= *nreadyp
;
3125 int pipe_0
, pipe_1
, pipe_hbrp
, pipe_ls
, schedule_i
;
3130 if (nready
<= 0 || pipe1_clock
>= clock
)
3133 /* Find any rtl insns that don't generate assembly insns and schedule
3135 for (i
= nready
- 1; i
>= 0; i
--)
3138 if (INSN_CODE (insn
) == -1
3139 || INSN_CODE (insn
) == CODE_FOR_blockage
3140 || INSN_CODE (insn
) == CODE_FOR__spu_convert
)
3142 ready
[i
] = ready
[nready
- 1];
3143 ready
[nready
- 1] = insn
;
3148 pipe_0
= pipe_1
= pipe_hbrp
= pipe_ls
= schedule_i
= -1;
3149 for (i
= 0; i
< nready
; i
++)
3150 if (INSN_CODE (ready
[i
]) != -1)
3153 switch (get_attr_type (insn
))
3178 case TYPE_IPREFETCH
:
3184 /* In the first scheduling phase, schedule loads and stores together
3185 to increase the chance they will get merged during postreload CSE. */
3186 if (!reload_completed
&& pipe_ls
>= 0)
3188 insn
= ready
[pipe_ls
];
3189 ready
[pipe_ls
] = ready
[nready
- 1];
3190 ready
[nready
- 1] = insn
;
3194 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3198 /* When we have loads/stores in every cycle of the last 15 insns and
3199 we are about to schedule another load/store, emit an hbrp insn
3202 && spu_sched_length
- spu_ls_first
>= 4 * 15
3203 && !(pipe0_clock
< clock
&& pipe_0
>= 0) && pipe_1
== pipe_ls
)
3205 insn
= sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3206 recog_memoized (insn
);
3207 if (pipe0_clock
< clock
)
3208 PUT_MODE (insn
, TImode
);
3209 spu_sched_variable_issue (file
, verbose
, insn
, -1);
3213 /* In general, we want to emit nops to increase dual issue, but dual
3214 issue isn't faster when one of the insns could be scheduled later
3215 without effecting the critical path. We look at INSN_PRIORITY to
3216 make a good guess, but it isn't perfect so -mdual-nops=n can be
3217 used to effect it. */
3218 if (in_spu_reorg
&& spu_dual_nops
< 10)
3220 /* When we are at an even address and we are not issueing nops to
3221 improve scheduling then we need to advance the cycle. */
3222 if ((spu_sched_length
& 7) == 0 && prev_clock_var
== clock
3223 && (spu_dual_nops
== 0
3226 INSN_PRIORITY (ready
[pipe_1
]) + spu_dual_nops
)))
3229 /* When at an odd address, schedule the highest priority insn
3230 without considering pipeline. */
3231 if ((spu_sched_length
& 7) == 4 && prev_clock_var
!= clock
3232 && (spu_dual_nops
== 0
3234 INSN_PRIORITY (ready
[nready
- 1]) + spu_dual_nops
)))
3239 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3240 pipe0 insn in the ready list, schedule it. */
3241 if (pipe0_clock
< clock
&& pipe_0
>= 0)
3242 schedule_i
= pipe_0
;
3244 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3245 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3247 schedule_i
= pipe_1
;
3249 if (schedule_i
> -1)
3251 insn
= ready
[schedule_i
];
3252 ready
[schedule_i
] = ready
[nready
- 1];
3253 ready
[nready
- 1] = insn
;
3259 /* INSN is dependent on DEP_INSN. */
3261 spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
)
3265 /* The blockage pattern is used to prevent instructions from being
3266 moved across it and has no cost. */
3267 if (INSN_CODE (insn
) == CODE_FOR_blockage
3268 || INSN_CODE (dep_insn
) == CODE_FOR_blockage
)
3271 if (INSN_CODE (insn
) == CODE_FOR__spu_convert
3272 || INSN_CODE (dep_insn
) == CODE_FOR__spu_convert
)
3275 /* Make sure hbrps are spread out. */
3276 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3277 && INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3280 /* Make sure hints and hbrps are 2 cycles apart. */
3281 if ((INSN_CODE (insn
) == CODE_FOR_iprefetch
3282 || INSN_CODE (insn
) == CODE_FOR_hbr
)
3283 && (INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
3284 || INSN_CODE (dep_insn
) == CODE_FOR_hbr
))
3287 /* An hbrp has no real dependency on other insns. */
3288 if (INSN_CODE (insn
) == CODE_FOR_iprefetch
3289 || INSN_CODE (dep_insn
) == CODE_FOR_iprefetch
)
3292 /* Assuming that it is unlikely an argument register will be used in
3293 the first cycle of the called function, we reduce the cost for
3294 slightly better scheduling of dep_insn. When not hinted, the
3295 mispredicted branch would hide the cost as well. */
3298 rtx target
= get_branch_target (insn
);
3299 if (GET_CODE (target
) != REG
|| !set_of (target
, insn
))
3304 /* And when returning from a function, let's assume the return values
3305 are completed sooner too. */
3306 if (CALL_P (dep_insn
))
3309 /* Make sure an instruction that loads from the back chain is schedule
3310 away from the return instruction so a hint is more likely to get
3312 if (INSN_CODE (insn
) == CODE_FOR__return
3313 && (set
= single_set (dep_insn
))
3314 && GET_CODE (SET_DEST (set
)) == REG
3315 && REGNO (SET_DEST (set
)) == LINK_REGISTER_REGNUM
)
3318 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3319 scheduler makes every insn in a block anti-dependent on the final
3320 jump_insn. We adjust here so higher cost insns will get scheduled
3322 if (JUMP_P (insn
) && REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
3323 return insn_cost (dep_insn
) - 3;
3328 /* Create a CONST_DOUBLE from a string. */
3330 spu_float_const (const char *string
, enum machine_mode mode
)
3332 REAL_VALUE_TYPE value
;
3333 value
= REAL_VALUE_ATOF (string
, mode
);
3334 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
3338 spu_constant_address_p (rtx x
)
3340 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
3341 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
3342 || GET_CODE (x
) == HIGH
);
3345 static enum spu_immediate
3346 which_immediate_load (HOST_WIDE_INT val
)
3348 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3350 if (val
>= -0x8000 && val
<= 0x7fff)
3352 if (val
>= 0 && val
<= 0x3ffff)
3354 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3356 if ((val
& 0xffff) == 0)
3362 /* Return true when OP can be loaded by one of the il instructions, or
3363 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3365 immediate_load_p (rtx op
, enum machine_mode mode
)
3367 if (CONSTANT_P (op
))
3369 enum immediate_class c
= classify_immediate (op
, mode
);
3370 return c
== IC_IL1
|| c
== IC_IL1s
3371 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
3376 /* Return true if the first SIZE bytes of arr is a constant that can be
3377 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3378 represent the size and offset of the instruction to use. */
3380 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
3382 int cpat
, run
, i
, start
;
3386 for (i
= 0; i
< size
&& cpat
; i
++)
3394 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
3396 else if (arr
[i
] == 0)
3398 while (arr
[i
+run
] == run
&& i
+run
< 16)
3400 if (run
!= 4 && run
!= 8)
3405 if ((i
& (run
-1)) != 0)
3412 if (cpat
&& (run
|| size
< 16))
3419 *pstart
= start
== -1 ? 16-run
: start
;
3425 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3426 it into a register. MODE is only valid when OP is a CONST_INT. */
3427 static enum immediate_class
3428 classify_immediate (rtx op
, enum machine_mode mode
)
3431 unsigned char arr
[16];
3432 int i
, j
, repeated
, fsmbi
, repeat
;
3434 gcc_assert (CONSTANT_P (op
));
3436 if (GET_MODE (op
) != VOIDmode
)
3437 mode
= GET_MODE (op
);
3439 /* A V4SI const_vector with all identical symbols is ok. */
3442 && GET_CODE (op
) == CONST_VECTOR
3443 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
3444 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
3445 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
3446 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
3447 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
3448 op
= CONST_VECTOR_ELT (op
, 0);
3450 switch (GET_CODE (op
))
3454 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
3457 /* We can never know if the resulting address fits in 18 bits and can be
3458 loaded with ila. For now, assume the address will not overflow if
3459 the displacement is "small" (fits 'K' constraint). */
3460 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
3462 rtx sym
= XEXP (XEXP (op
, 0), 0);
3463 rtx cst
= XEXP (XEXP (op
, 0), 1);
3465 if (GET_CODE (sym
) == SYMBOL_REF
3466 && GET_CODE (cst
) == CONST_INT
3467 && satisfies_constraint_K (cst
))
3476 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
3477 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
3478 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
3484 constant_to_array (mode
, op
, arr
);
3486 /* Check that each 4-byte slot is identical. */
3488 for (i
= 4; i
< 16; i
+= 4)
3489 for (j
= 0; j
< 4; j
++)
3490 if (arr
[j
] != arr
[i
+ j
])
3495 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3496 val
= trunc_int_for_mode (val
, SImode
);
3498 if (which_immediate_load (val
) != SPU_NONE
)
3502 /* Any mode of 2 bytes or smaller can be loaded with an il
3504 gcc_assert (GET_MODE_SIZE (mode
) > 2);
3508 for (i
= 0; i
< 16 && fsmbi
; i
++)
3509 if (arr
[i
] != 0 && repeat
== 0)
3511 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
3514 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
3516 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
3529 static enum spu_immediate
3530 which_logical_immediate (HOST_WIDE_INT val
)
3532 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
3534 if (val
>= -0x200 && val
<= 0x1ff)
3536 if (val
>= 0 && val
<= 0xffff)
3538 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
3540 val
= trunc_int_for_mode (val
, HImode
);
3541 if (val
>= -0x200 && val
<= 0x1ff)
3543 if ((val
& 0xff) == ((val
>> 8) & 0xff))
3545 val
= trunc_int_for_mode (val
, QImode
);
3546 if (val
>= -0x200 && val
<= 0x1ff)
3553 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3556 const_vector_immediate_p (rtx x
)
3559 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
3560 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
3561 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
3562 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
3568 logical_immediate_p (rtx op
, enum machine_mode mode
)
3571 unsigned char arr
[16];
3574 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3575 || GET_CODE (op
) == CONST_VECTOR
);
3577 if (GET_CODE (op
) == CONST_VECTOR
3578 && !const_vector_immediate_p (op
))
3581 if (GET_MODE (op
) != VOIDmode
)
3582 mode
= GET_MODE (op
);
3584 constant_to_array (mode
, op
, arr
);
3586 /* Check that bytes are repeated. */
3587 for (i
= 4; i
< 16; i
+= 4)
3588 for (j
= 0; j
< 4; j
++)
3589 if (arr
[j
] != arr
[i
+ j
])
3592 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3593 val
= trunc_int_for_mode (val
, SImode
);
3595 i
= which_logical_immediate (val
);
3596 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
3600 iohl_immediate_p (rtx op
, enum machine_mode mode
)
3603 unsigned char arr
[16];
3606 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3607 || GET_CODE (op
) == CONST_VECTOR
);
3609 if (GET_CODE (op
) == CONST_VECTOR
3610 && !const_vector_immediate_p (op
))
3613 if (GET_MODE (op
) != VOIDmode
)
3614 mode
= GET_MODE (op
);
3616 constant_to_array (mode
, op
, arr
);
3618 /* Check that bytes are repeated. */
3619 for (i
= 4; i
< 16; i
+= 4)
3620 for (j
= 0; j
< 4; j
++)
3621 if (arr
[j
] != arr
[i
+ j
])
3624 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3625 val
= trunc_int_for_mode (val
, SImode
);
3627 return val
>= 0 && val
<= 0xffff;
3631 arith_immediate_p (rtx op
, enum machine_mode mode
,
3632 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
3635 unsigned char arr
[16];
3638 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3639 || GET_CODE (op
) == CONST_VECTOR
);
3641 if (GET_CODE (op
) == CONST_VECTOR
3642 && !const_vector_immediate_p (op
))
3645 if (GET_MODE (op
) != VOIDmode
)
3646 mode
= GET_MODE (op
);
3648 constant_to_array (mode
, op
, arr
);
3650 if (VECTOR_MODE_P (mode
))
3651 mode
= GET_MODE_INNER (mode
);
3653 bytes
= GET_MODE_SIZE (mode
);
3654 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3656 /* Check that bytes are repeated. */
3657 for (i
= bytes
; i
< 16; i
+= bytes
)
3658 for (j
= 0; j
< bytes
; j
++)
3659 if (arr
[j
] != arr
[i
+ j
])
3663 for (j
= 1; j
< bytes
; j
++)
3664 val
= (val
<< 8) | arr
[j
];
3666 val
= trunc_int_for_mode (val
, mode
);
3668 return val
>= low
&& val
<= high
;
3671 /* TRUE when op is an immediate and an exact power of 2, and given that
3672 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3673 all entries must be the same. */
3675 exp2_immediate_p (rtx op
, enum machine_mode mode
, int low
, int high
)
3677 enum machine_mode int_mode
;
3679 unsigned char arr
[16];
3682 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
3683 || GET_CODE (op
) == CONST_VECTOR
);
3685 if (GET_CODE (op
) == CONST_VECTOR
3686 && !const_vector_immediate_p (op
))
3689 if (GET_MODE (op
) != VOIDmode
)
3690 mode
= GET_MODE (op
);
3692 constant_to_array (mode
, op
, arr
);
3694 if (VECTOR_MODE_P (mode
))
3695 mode
= GET_MODE_INNER (mode
);
3697 bytes
= GET_MODE_SIZE (mode
);
3698 int_mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
3700 /* Check that bytes are repeated. */
3701 for (i
= bytes
; i
< 16; i
+= bytes
)
3702 for (j
= 0; j
< bytes
; j
++)
3703 if (arr
[j
] != arr
[i
+ j
])
3707 for (j
= 1; j
< bytes
; j
++)
3708 val
= (val
<< 8) | arr
[j
];
3710 val
= trunc_int_for_mode (val
, int_mode
);
3712 /* Currently, we only handle SFmode */
3713 gcc_assert (mode
== SFmode
);
3716 int exp
= (val
>> 23) - 127;
3717 return val
> 0 && (val
& 0x007fffff) == 0
3718 && exp
>= low
&& exp
<= high
;
3723 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3726 ea_symbol_ref (rtx
*px
, void *data ATTRIBUTE_UNUSED
)
3731 return (GET_CODE (x
) == SYMBOL_REF
3732 && (decl
= SYMBOL_REF_DECL (x
)) != 0
3733 && TREE_CODE (decl
) == VAR_DECL
3734 && TYPE_ADDR_SPACE (strip_array_types (TREE_TYPE (decl
))));
3738 - any 32-bit constant (SImode, SFmode)
3739 - any constant that can be generated with fsmbi (any mode)
3740 - a 64-bit constant where the high and low bits are identical
3742 - a 128-bit constant where the four 32-bit words match. */
3744 spu_legitimate_constant_p (rtx x
)
3746 if (GET_CODE (x
) == HIGH
)
3749 /* Reject any __ea qualified reference. These can't appear in
3750 instructions but must be forced to the constant pool. */
3751 if (for_each_rtx (&x
, ea_symbol_ref
, 0))
3754 if (GET_CODE (x
) == CONST_VECTOR
)
3756 /* V4SI with all identical symbols is valid. */
3757 if (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
3758 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
3759 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
)
3761 && GET_MODE (x
) == V4SImode
3762 && CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
3763 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
3764 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3));
3766 if (!const_vector_immediate_p (x
))
3772 /* Valid address are:
3773 - symbol_ref, label_ref, const
3775 - reg + const, where const is 16 byte aligned
3776 - reg + reg, alignment doesn't matter
3777 The alignment matters in the reg+const case because lqd and stqd
3778 ignore the 4 least significant bits of the const.
3780 Addresses are handled in 4 phases.
3781 1) from the beginning of rtl expansion until the split0 pass. Any
3782 address is acceptable.
3783 2) The split0 pass. It is responsible for making every load and store
3784 valid. It calls legitimate_address with FOR_SPLIT set to 1. This
3785 is where non-16-byte aligned loads/stores are split into multiple
3786 instructions to extract or insert just the part we care about.
3787 3) From the split0 pass to the beginning of reload. During this
3788 phase the constant part of an address must be 16 byte aligned, and
3789 we don't allow any loads/store of less than 4 bytes. We also
3790 allow a mask of -16 to be part of the address as an optimization.
3791 4) From reload until the end. Reload can change the modes of loads
3792 and stores to something smaller than 4-bytes which we need to allow
3793 now, and it also adjusts the address to match. So in this phase we
3794 allow that special case. Still allow addresses with a mask of -16.
3796 FOR_SPLIT is only set to 1 for phase 2, otherwise it is 0. */
3798 spu_legitimate_address (enum machine_mode mode
, rtx x
, int reg_ok_strict
,
3801 int aligned
= (split0_completed
|| for_split
)
3802 && !reload_in_progress
&& !reload_completed
;
3803 int const_aligned
= split0_completed
|| for_split
;
3804 if (GET_MODE_SIZE (mode
) >= 16)
3806 else if (aligned
&& GET_MODE_SIZE (mode
) < 4)
3808 if (split0_completed
3809 && (GET_CODE (x
) == AND
3810 && GET_CODE (XEXP (x
, 1)) == CONST_INT
3811 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) - 16
3812 && !CONSTANT_P (XEXP (x
, 0))))
3814 switch (GET_CODE (x
))
3817 return !TARGET_LARGE_MEM
&& !aligned
;
3820 /* Keep __ea references until reload so that spu_expand_mov
3821 can see them in MEMs. */
3822 if (ea_symbol_ref (&x
, 0))
3823 return !reload_in_progress
&& !reload_completed
;
3824 return !TARGET_LARGE_MEM
&& (!aligned
|| ALIGNED_SYMBOL_REF_P (x
));
3827 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
3829 rtx sym
= XEXP (XEXP (x
, 0), 0);
3830 rtx cst
= XEXP (XEXP (x
, 0), 1);
3832 /* Accept any symbol_ref + constant, assuming it does not
3833 wrap around the local store addressability limit. */
3834 if (ea_symbol_ref (&sym
, 0))
3837 if (GET_CODE (sym
) == SYMBOL_REF
&& GET_CODE (cst
) == CONST_INT
)
3839 /* Check for alignment if required. */
3842 if ((INTVAL (cst
) & 15) == 0 && ALIGNED_SYMBOL_REF_P (sym
))
3849 /* We don't test alignement here. For an absolute address we
3850 assume the user knows what they are doing. */
3851 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
3855 if (GET_CODE (x
) != REG
)
3859 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
)
3860 && reg_aligned_for_addr (x
, 0);
3865 rtx op0
= XEXP (x
, 0);
3866 rtx op1
= XEXP (x
, 1);
3867 if (GET_CODE (op0
) == SUBREG
)
3868 op0
= XEXP (op0
, 0);
3869 if (GET_CODE (op1
) == SUBREG
)
3870 op1
= XEXP (op1
, 0);
3871 if (GET_CODE (op0
) == REG
3872 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3873 && GET_CODE (op1
) == CONST_INT
3874 && INTVAL (op1
) >= -0x2000
3875 && INTVAL (op1
) <= 0x1fff
3876 && reg_aligned_for_addr (op0
, 0)
3878 || (INTVAL (op1
) & 15) == 0
3879 || ((reload_in_progress
|| reload_completed
)
3880 && GET_MODE_SIZE (mode
) < 4
3881 && (INTVAL (op1
) & 15) == 4 - GET_MODE_SIZE (mode
))
3882 /* Some passes create a fake register for testing valid
3883 * addresses, be more lenient when we see those. ivopts
3884 * and reload do it. */
3885 || REGNO (op0
) == LAST_VIRTUAL_REGISTER
+ 1
3886 || REGNO (op0
) == LAST_VIRTUAL_REGISTER
+ 2))
3888 if (GET_CODE (op0
) == REG
3889 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
3890 && reg_aligned_for_addr (op0
, 0)
3891 && GET_CODE (op1
) == REG
3892 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
)
3893 && reg_aligned_for_addr (op1
, 0))
3904 /* When the address is reg + const_int, force the const_int into a
3907 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
3908 enum machine_mode mode
)
3911 /* Make sure both operands are registers. */
3912 if (GET_CODE (x
) == PLUS
)
3916 if (ALIGNED_SYMBOL_REF_P (op0
))
3918 op0
= force_reg (Pmode
, op0
);
3919 mark_reg_pointer (op0
, 128);
3921 else if (GET_CODE (op0
) != REG
)
3922 op0
= force_reg (Pmode
, op0
);
3923 if (ALIGNED_SYMBOL_REF_P (op1
))
3925 op1
= force_reg (Pmode
, op1
);
3926 mark_reg_pointer (op1
, 128);
3928 else if (GET_CODE (op1
) != REG
)
3929 op1
= force_reg (Pmode
, op1
);
3930 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
3931 if (spu_legitimate_address (mode
, x
, 0, 0))
3937 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3938 struct attribute_spec.handler. */
3940 spu_handle_fndecl_attribute (tree
* node
,
3942 tree args ATTRIBUTE_UNUSED
,
3943 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3945 if (TREE_CODE (*node
) != FUNCTION_DECL
)
3947 warning (0, "`%s' attribute only applies to functions",
3948 IDENTIFIER_POINTER (name
));
3949 *no_add_attrs
= true;
3955 /* Handle the "vector" attribute. */
3957 spu_handle_vector_attribute (tree
* node
, tree name
,
3958 tree args ATTRIBUTE_UNUSED
,
3959 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
3961 tree type
= *node
, result
= NULL_TREE
;
3962 enum machine_mode mode
;
3965 while (POINTER_TYPE_P (type
)
3966 || TREE_CODE (type
) == FUNCTION_TYPE
3967 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
3968 type
= TREE_TYPE (type
);
3970 mode
= TYPE_MODE (type
);
3972 unsigned_p
= TYPE_UNSIGNED (type
);
3976 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3979 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3982 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3985 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3988 result
= V4SF_type_node
;
3991 result
= V2DF_type_node
;
3997 /* Propagate qualifiers attached to the element type
3998 onto the vector type. */
3999 if (result
&& result
!= type
&& TYPE_QUALS (type
))
4000 result
= build_qualified_type (result
, TYPE_QUALS (type
));
4002 *no_add_attrs
= true; /* No need to hang on to the attribute. */
4005 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name
));
4007 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
4012 /* Return nonzero if FUNC is a naked function. */
4014 spu_naked_function_p (tree func
)
4018 if (TREE_CODE (func
) != FUNCTION_DECL
)
4021 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
4022 return a
!= NULL_TREE
;
4026 spu_initial_elimination_offset (int from
, int to
)
4028 int saved_regs_size
= spu_saved_regs_size ();
4030 if (!current_function_is_leaf
|| current_function_outgoing_args_size
4031 || get_frame_size () || saved_regs_size
)
4032 sp_offset
= STACK_POINTER_OFFSET
;
4033 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4034 return (sp_offset
+ current_function_outgoing_args_size
);
4035 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4037 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
4038 return sp_offset
+ current_function_outgoing_args_size
4039 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
4040 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
4041 return get_frame_size () + saved_regs_size
+ sp_offset
;
4046 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
4048 enum machine_mode mode
= TYPE_MODE (type
);
4049 int byte_size
= ((mode
== BLKmode
)
4050 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4052 /* Make sure small structs are left justified in a register. */
4053 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4054 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
4056 enum machine_mode smode
;
4059 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
4060 int n
= byte_size
/ UNITS_PER_WORD
;
4061 v
= rtvec_alloc (nregs
);
4062 for (i
= 0; i
< n
; i
++)
4064 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
4065 gen_rtx_REG (TImode
,
4068 GEN_INT (UNITS_PER_WORD
* i
));
4069 byte_size
-= UNITS_PER_WORD
;
4077 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
4079 gen_rtx_EXPR_LIST (VOIDmode
,
4080 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
4081 GEN_INT (UNITS_PER_WORD
* n
));
4083 return gen_rtx_PARALLEL (mode
, v
);
4085 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
4089 spu_function_arg (CUMULATIVE_ARGS cum
,
4090 enum machine_mode mode
,
4091 tree type
, int named ATTRIBUTE_UNUSED
)
4095 if (cum
>= MAX_REGISTER_ARGS
)
4098 byte_size
= ((mode
== BLKmode
)
4099 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
4101 /* The ABI does not allow parameters to be passed partially in
4102 reg and partially in stack. */
4103 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
4106 /* Make sure small structs are left justified in a register. */
4107 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
4108 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
4110 enum machine_mode smode
;
4114 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
4115 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
4116 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
4118 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
4121 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
4124 /* Variable sized types are passed by reference. */
4126 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
4127 enum machine_mode mode ATTRIBUTE_UNUSED
,
4128 const_tree type
, bool named ATTRIBUTE_UNUSED
)
4130 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
4136 /* Create and return the va_list datatype.
4138 On SPU, va_list is an array type equivalent to
4140 typedef struct __va_list_tag
4142 void *__args __attribute__((__aligned(16)));
4143 void *__skip __attribute__((__aligned(16)));
4147 where __args points to the arg that will be returned by the next
4148 va_arg(), and __skip points to the previous stack frame such that
4149 when __args == __skip we should advance __args by 32 bytes. */
4151 spu_build_builtin_va_list (void)
4153 tree f_args
, f_skip
, record
, type_decl
;
4156 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
4159 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
4161 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
4162 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
4164 DECL_FIELD_CONTEXT (f_args
) = record
;
4165 DECL_ALIGN (f_args
) = 128;
4166 DECL_USER_ALIGN (f_args
) = 1;
4168 DECL_FIELD_CONTEXT (f_skip
) = record
;
4169 DECL_ALIGN (f_skip
) = 128;
4170 DECL_USER_ALIGN (f_skip
) = 1;
4172 TREE_CHAIN (record
) = type_decl
;
4173 TYPE_NAME (record
) = type_decl
;
4174 TYPE_FIELDS (record
) = f_args
;
4175 TREE_CHAIN (f_args
) = f_skip
;
4177 /* We know this is being padded and we want it too. It is an internal
4178 type so hide the warnings from the user. */
4180 warn_padded
= false;
4182 layout_type (record
);
4186 /* The correct type is an array type of one element. */
4187 return build_array_type (record
, build_index_type (size_zero_node
));
4190 /* Implement va_start by filling the va_list structure VALIST.
4191 NEXTARG points to the first anonymous stack argument.
4193 The following global variables are used to initialize
4194 the va_list structure:
4196 current_function_args_info;
4197 the CUMULATIVE_ARGS for this function
4199 current_function_arg_offset_rtx:
4200 holds the offset of the first anonymous stack argument
4201 (relative to the virtual arg pointer). */
4204 spu_va_start (tree valist
, rtx nextarg
)
4206 tree f_args
, f_skip
;
4209 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4210 f_skip
= TREE_CHAIN (f_args
);
4212 valist
= build_va_arg_indirect_ref (valist
);
4214 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4216 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4218 /* Find the __args area. */
4219 t
= make_tree (TREE_TYPE (args
), nextarg
);
4220 if (current_function_pretend_args_size
> 0)
4221 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
4222 size_int (-STACK_POINTER_OFFSET
));
4223 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, t
);
4224 TREE_SIDE_EFFECTS (t
) = 1;
4225 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4227 /* Find the __skip area. */
4228 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
4229 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
4230 size_int (current_function_pretend_args_size
4231 - STACK_POINTER_OFFSET
));
4232 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (skip
), skip
, t
);
4233 TREE_SIDE_EFFECTS (t
) = 1;
4234 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
4237 /* Gimplify va_arg by updating the va_list structure
4238 VALIST as required to retrieve an argument of type
4239 TYPE, and returning that argument.
4241 ret = va_arg(VALIST, TYPE);
4243 generates code equivalent to:
4245 paddedsize = (sizeof(TYPE) + 15) & -16;
4246 if (VALIST.__args + paddedsize > VALIST.__skip
4247 && VALIST.__args <= VALIST.__skip)
4248 addr = VALIST.__skip + 32;
4250 addr = VALIST.__args;
4251 VALIST.__args = addr + paddedsize;
4252 ret = *(TYPE *)addr;
4255 spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
4256 tree
* post_p ATTRIBUTE_UNUSED
)
4258 tree f_args
, f_skip
;
4260 HOST_WIDE_INT size
, rsize
;
4261 tree paddedsize
, addr
, tmp
;
4262 bool pass_by_reference_p
;
4264 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
4265 f_skip
= TREE_CHAIN (f_args
);
4267 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
4269 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
4271 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
4273 addr
= create_tmp_var (ptr_type_node
, "va_arg");
4274 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
4276 /* if an object is dynamically sized, a pointer to it is passed
4277 instead of the object itself. */
4278 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
4280 if (pass_by_reference_p
)
4281 type
= build_pointer_type (type
);
4282 size
= int_size_in_bytes (type
);
4283 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
4285 /* build conditional expression to calculate addr. The expression
4286 will be gimplified later. */
4287 paddedsize
= size_int (rsize
);
4288 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, args
, paddedsize
);
4289 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
4290 build2 (GT_EXPR
, boolean_type_node
, tmp
, skip
),
4291 build2 (LE_EXPR
, boolean_type_node
, args
, skip
));
4293 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
4294 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, skip
,
4295 size_int (32)), args
);
4297 tmp
= build2 (GIMPLE_MODIFY_STMT
, ptr_type_node
, addr
, tmp
);
4298 gimplify_and_add (tmp
, pre_p
);
4300 /* update VALIST.__args */
4301 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
4302 tmp
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, tmp
);
4303 gimplify_and_add (tmp
, pre_p
);
4305 addr
= fold_convert (build_pointer_type (type
), addr
);
4307 if (pass_by_reference_p
)
4308 addr
= build_va_arg_indirect_ref (addr
);
4310 return build_va_arg_indirect_ref (addr
);
4313 /* Save parameter registers starting with the register that corresponds
4314 to the first unnamed parameters. If the first unnamed parameter is
4315 in the stack then save no registers. Set pretend_args_size to the
4316 amount of space needed to save the registers. */
4318 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
4319 tree type
, int *pretend_size
, int no_rtl
)
4328 /* cum currently points to the last named argument, we want to
4329 start at the next argument. */
4330 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
4332 offset
= -STACK_POINTER_OFFSET
;
4333 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
4335 tmp
= gen_frame_mem (V4SImode
,
4336 plus_constant (virtual_incoming_args_rtx
,
4338 emit_move_insn (tmp
,
4339 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
4342 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
4347 spu_conditional_register_usage (void)
4351 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4352 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
4356 /* This is called any time we inspect the alignment of a register for
4359 reg_aligned_for_addr (rtx x
, int aligned
)
4362 REGNO (x
) < FIRST_PSEUDO_REGISTER
? ORIGINAL_REGNO (x
) : REGNO (x
);
4365 return REGNO_POINTER_ALIGN (regno
) >= 128;
4368 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4369 into its SYMBOL_REF_FLAGS. */
4371 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
4373 default_encode_section_info (decl
, rtl
, first
);
4375 /* If a variable has a forced alignment to < 16 bytes, mark it with
4376 SYMBOL_FLAG_ALIGN1. */
4377 if (TREE_CODE (decl
) == VAR_DECL
4378 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
4379 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
4382 /* Return TRUE if we are certain the mem refers to a complete object
4383 which is both 16-byte aligned and padded to a 16-byte boundary. This
4384 would make it safe to store with a single instruction.
4385 We guarantee the alignment and padding for static objects by aligning
4386 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4387 FIXME: We currently cannot guarantee this for objects on the stack
4388 because assign_parm_setup_stack calls assign_stack_local with the
4389 alignment of the parameter mode and in that case the alignment never
4390 gets adjusted by LOCAL_ALIGNMENT. */
4392 store_with_one_insn_p (rtx mem
)
4394 enum machine_mode mode
= GET_MODE (mem
);
4395 rtx addr
= XEXP (mem
, 0);
4396 if (mode
== BLKmode
)
4398 if (GET_MODE_SIZE (mode
) >= 16)
4400 /* Only static objects. */
4401 if (GET_CODE (addr
) == SYMBOL_REF
)
4403 /* We use the associated declaration to make sure the access is
4404 referring to the whole object.
4405 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4406 if it is necessary. Will there be cases where one exists, and
4407 the other does not? Will there be cases where both exist, but
4408 have different types? */
4409 tree decl
= MEM_EXPR (mem
);
4411 && TREE_CODE (decl
) == VAR_DECL
4412 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4414 decl
= SYMBOL_REF_DECL (addr
);
4416 && TREE_CODE (decl
) == VAR_DECL
4417 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
4423 /* Return 1 when the address is not valid for a simple load and store as
4424 required by the '_mov*' patterns. We could make this less strict
4425 for loads, but we prefer mem's to look the same so they are more
4426 likely to be merged. */
4428 address_needs_split (rtx mem
)
4430 if (GET_MODE_SIZE (GET_MODE (mem
)) < 16
4431 && (GET_MODE_SIZE (GET_MODE (mem
)) < 4
4432 || !(store_with_one_insn_p (mem
)
4433 || mem_is_padded_component_ref (mem
))))
4439 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
4442 rtx cache_fetch_dirty
;
4443 int ea_alias_set
= -1;
4445 /* MEM is known to be an __ea qualified memory access. Emit a call to
4446 fetch the ppu memory to local store, and return its address in local
4450 ea_load_store (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4454 rtx ndirty
= GEN_INT (GET_MODE_SIZE (GET_MODE (mem
)));
4455 if (!cache_fetch_dirty
)
4456 cache_fetch_dirty
= init_one_libfunc ("__cache_fetch_dirty");
4457 emit_library_call_value (cache_fetch_dirty
, data_addr
, LCT_NORMAL
, Pmode
,
4458 2, ea_addr
, EAmode
, ndirty
, SImode
);
4463 cache_fetch
= init_one_libfunc ("__cache_fetch");
4464 emit_library_call_value (cache_fetch
, data_addr
, LCT_NORMAL
, Pmode
,
4465 1, ea_addr
, EAmode
);
4469 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4470 dirty bit marking, inline.
4472 The cache control data structure is an array of
4474 struct __cache_tag_array
4476 unsigned int tag_lo[4];
4477 unsigned int tag_hi[4];
4478 void *data_pointer[4];
4480 vector unsigned short dirty_bits[4];
4484 ea_load_store_inline (rtx mem
, bool is_store
, rtx ea_addr
, rtx data_addr
)
4488 rtx tag_size_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array_size");
4489 rtx tag_arr_sym
= gen_rtx_SYMBOL_REF (Pmode
, "__cache_tag_array");
4490 rtx index_mask
= gen_reg_rtx (SImode
);
4491 rtx tag_arr
= gen_reg_rtx (Pmode
);
4492 rtx splat_mask
= gen_reg_rtx (TImode
);
4493 rtx splat
= gen_reg_rtx (V4SImode
);
4494 rtx splat_hi
= NULL_RTX
;
4495 rtx tag_index
= gen_reg_rtx (Pmode
);
4496 rtx block_off
= gen_reg_rtx (SImode
);
4497 rtx tag_addr
= gen_reg_rtx (Pmode
);
4498 rtx tag
= gen_reg_rtx (V4SImode
);
4499 rtx cache_tag
= gen_reg_rtx (V4SImode
);
4500 rtx cache_tag_hi
= NULL_RTX
;
4501 rtx cache_ptrs
= gen_reg_rtx (TImode
);
4502 rtx cache_ptrs_si
= gen_reg_rtx (SImode
);
4503 rtx tag_equal
= gen_reg_rtx (V4SImode
);
4504 rtx tag_equal_hi
= NULL_RTX
;
4505 rtx tag_eq_pack
= gen_reg_rtx (V4SImode
);
4506 rtx tag_eq_pack_si
= gen_reg_rtx (SImode
);
4507 rtx eq_index
= gen_reg_rtx (SImode
);
4508 rtx bcomp
, hit_label
, hit_ref
, cont_label
, insn
;
4510 if (spu_ea_model
!= 32)
4512 splat_hi
= gen_reg_rtx (V4SImode
);
4513 cache_tag_hi
= gen_reg_rtx (V4SImode
);
4514 tag_equal_hi
= gen_reg_rtx (V4SImode
);
4517 emit_move_insn (index_mask
, plus_constant (tag_size_sym
, -128));
4518 emit_move_insn (tag_arr
, tag_arr_sym
);
4519 v
= 0x0001020300010203LL
;
4520 emit_move_insn (splat_mask
, immed_double_const (v
, v
, TImode
));
4521 ea_addr_si
= ea_addr
;
4522 if (spu_ea_model
!= 32)
4523 ea_addr_si
= convert_to_mode (SImode
, ea_addr
, 1);
4525 /* tag_index = ea_addr & (tag_array_size - 128) */
4526 emit_insn (gen_andsi3 (tag_index
, ea_addr_si
, index_mask
));
4528 /* splat ea_addr to all 4 slots. */
4529 emit_insn (gen_shufb (splat
, ea_addr_si
, ea_addr_si
, splat_mask
));
4530 /* Similarly for high 32 bits of ea_addr. */
4531 if (spu_ea_model
!= 32)
4532 emit_insn (gen_shufb (splat_hi
, ea_addr
, ea_addr
, splat_mask
));
4534 /* block_off = ea_addr & 127 */
4535 emit_insn (gen_andsi3 (block_off
, ea_addr_si
, spu_const (SImode
, 127)));
4537 /* tag_addr = tag_arr + tag_index */
4538 emit_insn (gen_addsi3 (tag_addr
, tag_arr
, tag_index
));
4540 /* Read cache tags. */
4541 emit_move_insn (cache_tag
, gen_rtx_MEM (V4SImode
, tag_addr
));
4542 if (spu_ea_model
!= 32)
4543 emit_move_insn (cache_tag_hi
, gen_rtx_MEM (V4SImode
,
4544 plus_constant (tag_addr
, 16)));
4546 /* tag = ea_addr & -128 */
4547 emit_insn (gen_andv4si3 (tag
, splat
, spu_const (V4SImode
, -128)));
4549 /* Read all four cache data pointers. */
4550 emit_move_insn (cache_ptrs
, gen_rtx_MEM (TImode
,
4551 plus_constant (tag_addr
, 32)));
4554 emit_insn (gen_ceq_v4si (tag_equal
, tag
, cache_tag
));
4555 if (spu_ea_model
!= 32)
4557 emit_insn (gen_ceq_v4si (tag_equal_hi
, splat_hi
, cache_tag_hi
));
4558 emit_insn (gen_andv4si3 (tag_equal
, tag_equal
, tag_equal_hi
));
4561 /* At most one of the tags compare equal, so tag_equal has one
4562 32-bit slot set to all 1's, with the other slots all zero.
4563 gbb picks off low bit from each byte in the 128-bit registers,
4564 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4566 emit_insn (gen_spu_gbb (tag_eq_pack
, spu_gen_subreg (V16QImode
, tag_equal
)));
4567 emit_insn (gen_spu_convert (tag_eq_pack_si
, tag_eq_pack
));
4569 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4570 emit_insn (gen_clzsi2 (eq_index
, tag_eq_pack_si
));
4572 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4573 (rotating eq_index mod 16 bytes). */
4574 emit_insn (gen_rotqby_ti (cache_ptrs
, cache_ptrs
, eq_index
));
4575 emit_insn (gen_spu_convert (cache_ptrs_si
, cache_ptrs
));
4577 /* Add block offset to form final data address. */
4578 emit_insn (gen_addsi3 (data_addr
, cache_ptrs_si
, block_off
));
4580 /* Check that we did hit. */
4581 hit_label
= gen_label_rtx ();
4582 hit_ref
= gen_rtx_LABEL_REF (VOIDmode
, hit_label
);
4583 bcomp
= gen_rtx_NE (SImode
, tag_eq_pack_si
, const0_rtx
);
4584 insn
= emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
4585 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
4587 /* Say that this branch is very likely to happen. */
4588 v
= REG_BR_PROB_BASE
- REG_BR_PROB_BASE
/ 100 - 1;
4590 = gen_rtx_EXPR_LIST (REG_BR_PROB
, GEN_INT (v
), REG_NOTES (insn
));
4592 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4593 cont_label
= gen_label_rtx ();
4594 emit_jump_insn (gen_jump (cont_label
));
4597 emit_label (hit_label
);
4602 rtx dirty_bits
= gen_reg_rtx (TImode
);
4603 rtx dirty_off
= gen_reg_rtx (SImode
);
4604 rtx dirty_128
= gen_reg_rtx (TImode
);
4605 rtx neg_block_off
= gen_reg_rtx (SImode
);
4607 /* Set up mask with one dirty bit per byte of the mem we are
4608 writing, starting from top bit. */
4610 v
<<= (128 - GET_MODE_SIZE (GET_MODE (mem
))) & 63;
4611 if ((128 - GET_MODE_SIZE (GET_MODE (mem
))) >= 64)
4616 emit_move_insn (dirty_bits
, immed_double_const (v
, v_hi
, TImode
));
4618 /* Form index into cache dirty_bits. eq_index is one of
4619 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4620 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4621 offset to each of the four dirty_bits elements. */
4622 emit_insn (gen_ashlsi3 (dirty_off
, eq_index
, spu_const (SImode
, 2)));
4624 emit_insn (gen_spu_lqx (dirty_128
, tag_addr
, dirty_off
));
4626 /* Rotate bit mask to proper bit. */
4627 emit_insn (gen_negsi2 (neg_block_off
, block_off
));
4628 emit_insn (gen_rotqbybi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4629 emit_insn (gen_rotqbi_ti (dirty_bits
, dirty_bits
, neg_block_off
));
4631 /* Or in the new dirty bits. */
4632 emit_insn (gen_iorti3 (dirty_128
, dirty_bits
, dirty_128
));
4635 emit_insn (gen_spu_stqx (dirty_128
, tag_addr
, dirty_off
));
4638 emit_label (cont_label
);
4642 expand_ea_mem (rtx mem
, bool is_store
)
4645 rtx data_addr
= gen_reg_rtx (Pmode
);
4647 ea_addr
= force_reg (EAmode
, XEXP (mem
, 0));
4648 if (optimize_size
|| optimize
== 0)
4649 ea_load_store (mem
, is_store
, ea_addr
, data_addr
);
4651 ea_load_store_inline (mem
, is_store
, ea_addr
, data_addr
);
4653 mem
= change_address (mem
, VOIDmode
, data_addr
);
4655 if (ea_alias_set
== -1)
4656 ea_alias_set
= new_alias_set ();
4657 set_mem_alias_set (mem
, 0);
4658 set_mem_alias_set (mem
, ea_alias_set
);
4663 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
4665 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
4668 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
4670 rtx from
= SUBREG_REG (ops
[1]);
4671 enum machine_mode imode
= GET_MODE (from
);
4673 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
4674 && GET_MODE_CLASS (imode
) == MODE_INT
4675 && subreg_lowpart_p (ops
[1]));
4677 if (GET_MODE_SIZE (imode
) < 4)
4679 from
= gen_rtx_SUBREG (SImode
, from
, 0);
4683 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
4685 enum insn_code icode
= convert_optab_handler (trunc_optab
, mode
, imode
)->insn_code
;
4686 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
4689 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
4693 /* At least one of the operands needs to be a register. */
4694 if ((reload_in_progress
| reload_completed
) == 0
4695 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
4697 rtx temp
= force_reg (mode
, ops
[1]);
4698 emit_move_insn (ops
[0], temp
);
4701 if (reload_in_progress
|| reload_completed
)
4703 if (CONSTANT_P (ops
[1]))
4704 return spu_split_immediate (ops
);
4711 if (MEM_ADDR_SPACE (ops
[0]))
4712 ops
[0] = expand_ea_mem (ops
[0], true);
4714 else if (MEM_P (ops
[1]))
4716 if (MEM_ADDR_SPACE (ops
[1]))
4717 ops
[1] = expand_ea_mem (ops
[1], false);
4719 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4721 if (GET_CODE (ops
[1]) == CONST_INT
)
4723 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
4724 if (val
!= INTVAL (ops
[1]))
4726 emit_move_insn (ops
[0], GEN_INT (val
));
4735 spu_split_load (rtx
* ops
)
4737 enum machine_mode mode
= GET_MODE (ops
[0]);
4738 rtx addr
, load
, rot
, mem
, p0
, p1
;
4741 addr
= XEXP (ops
[1], 0);
4742 gcc_assert (GET_CODE (addr
) != AND
);
4744 if (!address_needs_split (ops
[1]))
4746 addr
= XEXP (ops
[1], 0);
4747 if (spu_legitimate_address (mode
, addr
, 0, 1))
4749 ops
[1] = change_address (ops
[1], VOIDmode
, force_reg (Pmode
, addr
));
4750 emit_move_insn (ops
[0], ops
[1]);
4757 if (MEM_ALIGN (ops
[1]) >= 128)
4758 /* Address is already aligned; simply perform a TImode load. */;
4759 else if (GET_CODE (addr
) == PLUS
)
4762 aligned reg + aligned reg => lqx
4763 aligned reg + unaligned reg => lqx, rotqby
4764 aligned reg + aligned const => lqd
4765 aligned reg + unaligned const => lqd, rotqbyi
4766 unaligned reg + aligned reg => lqx, rotqby
4767 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4768 unaligned reg + aligned const => lqd, rotqby
4769 unaligned reg + unaligned const -> not allowed by legitimate address
4771 p0
= XEXP (addr
, 0);
4772 p1
= XEXP (addr
, 1);
4773 if (!reg_aligned_for_addr (p0
, 1))
4775 if (GET_CODE (p1
) == REG
&& !reg_aligned_for_addr (p1
, 1))
4777 rot
= gen_reg_rtx (SImode
);
4778 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4780 else if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4783 && INTVAL (p1
) * BITS_PER_UNIT
< REG_ALIGN (p0
))
4785 rot
= gen_reg_rtx (SImode
);
4786 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4791 rtx x
= gen_reg_rtx (SImode
);
4792 emit_move_insn (x
, p1
);
4793 if (!spu_arith_operand (p1
, SImode
))
4795 rot
= gen_reg_rtx (SImode
);
4796 emit_insn (gen_addsi3 (rot
, p0
, p1
));
4797 addr
= gen_rtx_PLUS (Pmode
, p0
, x
);
4805 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
4807 rot_amt
= INTVAL (p1
) & 15;
4808 if (INTVAL (p1
) & -16)
4810 p1
= GEN_INT (INTVAL (p1
) & -16);
4811 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4816 else if (GET_CODE (p1
) == REG
&& !reg_aligned_for_addr (p1
, 1))
4820 else if (GET_CODE (addr
) == REG
)
4822 if (!reg_aligned_for_addr (addr
, 1))
4825 else if (GET_CODE (addr
) == CONST
)
4827 if (GET_CODE (XEXP (addr
, 0)) == PLUS
4828 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4829 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4831 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4833 addr
= gen_rtx_CONST (Pmode
,
4834 gen_rtx_PLUS (Pmode
,
4835 XEXP (XEXP (addr
, 0), 0),
4836 GEN_INT (rot_amt
& -16)));
4838 addr
= XEXP (XEXP (addr
, 0), 0);
4842 rot
= gen_reg_rtx (Pmode
);
4843 emit_move_insn (rot
, addr
);
4846 else if (GET_CODE (addr
) == CONST_INT
)
4848 rot_amt
= INTVAL (addr
);
4849 addr
= GEN_INT (rot_amt
& -16);
4851 else if (!ALIGNED_SYMBOL_REF_P (addr
))
4853 rot
= gen_reg_rtx (Pmode
);
4854 emit_move_insn (rot
, addr
);
4857 if (GET_MODE_SIZE (mode
) < 4)
4858 rot_amt
+= GET_MODE_SIZE (mode
) - 4;
4864 rtx x
= gen_reg_rtx (SImode
);
4865 emit_insn (gen_addsi3 (x
, rot
, GEN_INT (rot_amt
)));
4870 /* If the source is properly aligned, we don't need to split this insn into
4871 a TImode load plus a _spu_convert. However, we want to perform the split
4872 anyway when optimizing to make the MEMs look the same as those used for
4873 stores so they are more easily merged. When *not* optimizing, that will
4874 not happen anyway, so we prefer to avoid generating the _spu_convert. */
4875 if (!rot
&& !rot_amt
&& !optimize
)
4878 load
= gen_reg_rtx (TImode
);
4880 mem
= change_address (ops
[1], TImode
, copy_rtx (addr
));
4882 emit_insn (gen_movti (load
, mem
));
4885 emit_insn (gen_rotqby_ti (load
, load
, rot
));
4887 emit_insn (gen_rotlti3 (load
, load
, GEN_INT (rot_amt
* 8)));
4889 emit_insn (gen_spu_convert (ops
[0], load
));
4894 spu_split_store (rtx
* ops
)
4896 enum machine_mode mode
= GET_MODE (ops
[0]);
4898 rtx addr
, p0
, p1
, p1_lo
, smem
;
4902 if (!address_needs_split (ops
[0]))
4904 addr
= XEXP (ops
[0], 0);
4905 if (spu_legitimate_address (mode
, addr
, 0, 1))
4907 ops
[0] = change_address (ops
[0], VOIDmode
, force_reg (Pmode
, addr
));
4908 emit_move_insn (ops
[0], ops
[1]);
4912 addr
= XEXP (ops
[0], 0);
4913 gcc_assert (GET_CODE (addr
) != AND
);
4915 if (GET_CODE (addr
) == PLUS
)
4918 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4919 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4920 aligned reg + aligned const => lqd, c?d, shuf, stqx
4921 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4922 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4923 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4924 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4925 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4928 p0
= XEXP (addr
, 0);
4929 p1
= p1_lo
= XEXP (addr
, 1);
4930 if (GET_CODE (p0
) == REG
&& GET_CODE (p1
) == CONST_INT
)
4932 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
4933 if (reg_aligned_for_addr (p0
, 1))
4935 p1
= GEN_INT (INTVAL (p1
) & -16);
4936 if (p1
== const0_rtx
)
4939 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
4943 rtx x
= gen_reg_rtx (SImode
);
4944 emit_move_insn (x
, p1
);
4945 addr
= gen_rtx_PLUS (SImode
, p0
, x
);
4949 else if (GET_CODE (addr
) == REG
)
4953 p1
= p1_lo
= const0_rtx
;
4958 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
4959 p1
= 0; /* aform doesn't use p1 */
4961 if (ALIGNED_SYMBOL_REF_P (addr
))
4963 else if (GET_CODE (addr
) == CONST
4964 && GET_CODE (XEXP (addr
, 0)) == PLUS
4965 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
4966 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
4968 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
4970 addr
= gen_rtx_CONST (Pmode
,
4971 gen_rtx_PLUS (Pmode
,
4972 XEXP (XEXP (addr
, 0), 0),
4973 GEN_INT (v
& -16)));
4975 addr
= XEXP (XEXP (addr
, 0), 0);
4976 p1_lo
= GEN_INT (v
& 15);
4978 else if (GET_CODE (addr
) == CONST_INT
)
4980 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
4981 addr
= GEN_INT (INTVAL (addr
) & -16);
4985 p1_lo
= gen_reg_rtx (SImode
);
4986 emit_move_insn (p1_lo
, addr
);
4990 reg
= gen_reg_rtx (TImode
);
4992 scalar
= store_with_one_insn_p (ops
[0]);
4995 /* We could copy the flags from the ops[0] MEM to mem here,
4996 We don't because we want this load to be optimized away if
4997 possible, and copying the flags will prevent that in certain
4998 cases, e.g. consider the volatile flag. */
5000 rtx pat
= gen_reg_rtx (TImode
);
5001 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
5002 set_mem_alias_set (lmem
, 0);
5003 emit_insn (gen_movti (reg
, lmem
));
5005 if (!p0
|| reg_aligned_for_addr (p0
, 1))
5006 p0
= stack_pointer_rtx
;
5010 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
5011 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
5015 if (GET_CODE (ops
[1]) == REG
)
5016 emit_insn (gen_spu_convert (reg
, ops
[1]));
5017 else if (GET_CODE (ops
[1]) == SUBREG
)
5018 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
5023 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
5024 emit_insn (gen_ashlti3
5025 (reg
, reg
, GEN_INT (32 - GET_MODE_BITSIZE (mode
))));
5027 smem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
5028 /* We can't use the previous alias set because the memory has changed
5029 size and can potentially overlap objects of other types. */
5030 set_mem_alias_set (smem
, 0);
5032 emit_insn (gen_movti (smem
, reg
));
5036 /* Return TRUE if X is MEM which is a struct member reference
5037 and the member can safely be loaded and stored with a single
5038 instruction because it is padded. */
5040 mem_is_padded_component_ref (rtx x
)
5042 tree t
= MEM_EXPR (x
);
5044 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
5046 t
= TREE_OPERAND (t
, 1);
5047 if (!t
|| TREE_CODE (t
) != FIELD_DECL
5048 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
5050 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5051 r
= DECL_FIELD_CONTEXT (t
);
5052 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
5054 /* Make sure they are the same mode */
5055 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
5057 /* If there are no following fields then the field alignment assures
5058 the structure is padded to the alignment which means this field is
5060 if (TREE_CHAIN (t
) == 0)
5062 /* If the following field is also aligned then this field will be
5065 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
5070 /* Parse the -mfixed-range= option string. */
5072 fix_range (const char *const_str
)
5075 char *str
, *dash
, *comma
;
5077 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5078 REG2 are either register names or register numbers. The effect
5079 of this option is to mark the registers in the range from REG1 to
5080 REG2 as ``fixed'' so they won't be used by the compiler. */
5082 i
= strlen (const_str
);
5083 str
= (char *) alloca (i
+ 1);
5084 memcpy (str
, const_str
, i
+ 1);
5088 dash
= strchr (str
, '-');
5091 warning (0, "value of -mfixed-range must have form REG1-REG2");
5095 comma
= strchr (dash
+ 1, ',');
5099 first
= decode_reg_name (str
);
5102 warning (0, "unknown register name: %s", str
);
5106 last
= decode_reg_name (dash
+ 1);
5109 warning (0, "unknown register name: %s", dash
+ 1);
5117 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
5121 for (i
= first
; i
<= last
; ++i
)
5122 fixed_regs
[i
] = call_used_regs
[i
] = 1;
5132 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5133 can be generated using the fsmbi instruction. */
5135 fsmbi_const_p (rtx x
)
5139 /* We can always choose TImode for CONST_INT because the high bits
5140 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5141 enum immediate_class c
= classify_immediate (x
, TImode
);
5142 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
5147 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5148 can be generated using the cbd, chd, cwd or cdd instruction. */
5150 cpat_const_p (rtx x
, enum machine_mode mode
)
5154 enum immediate_class c
= classify_immediate (x
, mode
);
5155 return c
== IC_CPAT
;
5161 gen_cpat_const (rtx
* ops
)
5163 unsigned char dst
[16];
5164 int i
, offset
, shift
, isize
;
5165 if (GET_CODE (ops
[3]) != CONST_INT
5166 || GET_CODE (ops
[2]) != CONST_INT
5167 || (GET_CODE (ops
[1]) != CONST_INT
5168 && GET_CODE (ops
[1]) != REG
))
5170 if (GET_CODE (ops
[1]) == REG
5171 && (!REG_POINTER (ops
[1])
5172 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
5175 for (i
= 0; i
< 16; i
++)
5177 isize
= INTVAL (ops
[3]);
5180 else if (isize
== 2)
5184 offset
= (INTVAL (ops
[2]) +
5185 (GET_CODE (ops
[1]) ==
5186 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
5187 for (i
= 0; i
< isize
; i
++)
5188 dst
[offset
+ i
] = i
+ shift
;
5189 return array_to_constant (TImode
, dst
);
5192 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5193 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5194 than 16 bytes, the value is repeated across the rest of the array. */
5196 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
5201 memset (arr
, 0, 16);
5202 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
5203 if (GET_CODE (x
) == CONST_INT
5204 || (GET_CODE (x
) == CONST_DOUBLE
5205 && (mode
== SFmode
|| mode
== DFmode
)))
5207 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
5209 if (GET_CODE (x
) == CONST_DOUBLE
)
5210 val
= const_double_to_hwint (x
);
5213 first
= GET_MODE_SIZE (mode
) - 1;
5214 for (i
= first
; i
>= 0; i
--)
5216 arr
[i
] = val
& 0xff;
5219 /* Splat the constant across the whole array. */
5220 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
5223 j
= (j
== first
) ? 0 : j
+ 1;
5226 else if (GET_CODE (x
) == CONST_DOUBLE
)
5228 val
= CONST_DOUBLE_LOW (x
);
5229 for (i
= 15; i
>= 8; i
--)
5231 arr
[i
] = val
& 0xff;
5234 val
= CONST_DOUBLE_HIGH (x
);
5235 for (i
= 7; i
>= 0; i
--)
5237 arr
[i
] = val
& 0xff;
5241 else if (GET_CODE (x
) == CONST_VECTOR
)
5245 mode
= GET_MODE_INNER (mode
);
5246 units
= CONST_VECTOR_NUNITS (x
);
5247 for (i
= 0; i
< units
; i
++)
5249 elt
= CONST_VECTOR_ELT (x
, i
);
5250 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
5252 if (GET_CODE (elt
) == CONST_DOUBLE
)
5253 val
= const_double_to_hwint (elt
);
5256 first
= GET_MODE_SIZE (mode
) - 1;
5257 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
5259 for (j
= first
; j
>= 0; j
--)
5261 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
5271 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5272 smaller than 16 bytes, use the bytes that would represent that value
5273 in a register, e.g., for QImode return the value of arr[3]. */
5275 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
5277 enum machine_mode inner_mode
;
5279 int units
, size
, i
, j
, k
;
5282 if (GET_MODE_CLASS (mode
) == MODE_INT
5283 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
5285 j
= GET_MODE_SIZE (mode
);
5286 i
= j
< 4 ? 4 - j
: 0;
5287 for (val
= 0; i
< j
; i
++)
5288 val
= (val
<< 8) | arr
[i
];
5289 val
= trunc_int_for_mode (val
, mode
);
5290 return GEN_INT (val
);
5296 for (i
= high
= 0; i
< 8; i
++)
5297 high
= (high
<< 8) | arr
[i
];
5298 for (i
= 8, val
= 0; i
< 16; i
++)
5299 val
= (val
<< 8) | arr
[i
];
5300 return immed_double_const (val
, high
, TImode
);
5304 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5305 val
= trunc_int_for_mode (val
, SImode
);
5306 return hwint_to_const_double (SFmode
, val
);
5310 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
5312 val
|= (arr
[4] << 24) | (arr
[5] << 16) | (arr
[6] << 8) | arr
[7];
5313 return hwint_to_const_double (DFmode
, val
);
5316 if (!VECTOR_MODE_P (mode
))
5319 units
= GET_MODE_NUNITS (mode
);
5320 size
= GET_MODE_UNIT_SIZE (mode
);
5321 inner_mode
= GET_MODE_INNER (mode
);
5322 v
= rtvec_alloc (units
);
5324 for (k
= i
= 0; i
< units
; ++i
)
5327 for (j
= 0; j
< size
; j
++, k
++)
5328 val
= (val
<< 8) | arr
[k
];
5330 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
5331 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
5333 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
5338 return gen_rtx_CONST_VECTOR (mode
, v
);
5342 reloc_diagnostic (rtx x
)
5344 tree loc_decl
, decl
= 0;
5346 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
5349 if (GET_CODE (x
) == SYMBOL_REF
)
5350 decl
= SYMBOL_REF_DECL (x
);
5351 else if (GET_CODE (x
) == CONST
5352 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
5353 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
5355 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5356 if (decl
&& !DECL_P (decl
))
5359 /* We use last_assemble_variable_decl to get line information. It's
5360 not always going to be right and might not even be close, but will
5361 be right for the more common cases. */
5362 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
5365 loc_decl
= last_assemble_variable_decl
;
5367 /* The decl could be a string constant. */
5368 if (decl
&& DECL_P (decl
))
5369 msg
= "%Jcreating run-time relocation for %qD";
5371 msg
= "creating run-time relocation";
5373 if (TARGET_WARN_RELOC
)
5374 warning (0, msg
, loc_decl
, decl
);
5376 error (msg
, loc_decl
, decl
);
5379 /* Hook into assemble_integer so we can generate an error for run-time
5380 relocations. The SPU ABI disallows them. */
5382 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
5384 /* By default run-time relocations aren't supported, but we allow them
5385 in case users support it in their own run-time loader. And we provide
5386 a warning for those users that don't. */
5387 if ((GET_CODE (x
) == SYMBOL_REF
)
5388 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
5389 reloc_diagnostic (x
);
5391 return default_assemble_integer (x
, size
, aligned_p
);
5395 spu_asm_globalize_label (FILE * file
, const char *name
)
5397 fputs ("\t.global\t", file
);
5398 assemble_name (file
, name
);
5403 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
)
5405 enum machine_mode mode
= GET_MODE (x
);
5406 int cost
= COSTS_N_INSNS (2);
5408 /* Folding to a CONST_VECTOR will use extra space but there might
5409 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5410 only if it allows us to fold away multiple insns. Changing the cost
5411 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5412 because this cost will only be compared against a single insn.
5413 if (code == CONST_VECTOR)
5414 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5417 /* Use defaults for float operations. Not accurate but good enough. */
5420 *total
= COSTS_N_INSNS (13);
5425 *total
= COSTS_N_INSNS (6);
5431 if (satisfies_constraint_K (x
))
5433 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
5434 *total
= COSTS_N_INSNS (1);
5436 *total
= COSTS_N_INSNS (3);
5440 *total
= COSTS_N_INSNS (3);
5445 *total
= COSTS_N_INSNS (0);
5449 *total
= COSTS_N_INSNS (5);
5453 case FLOAT_TRUNCATE
:
5455 case UNSIGNED_FLOAT
:
5458 *total
= COSTS_N_INSNS (7);
5464 *total
= COSTS_N_INSNS (9);
5471 GET_CODE (XEXP (x
, 0)) ==
5472 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5473 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
5475 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
5477 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
5478 cost
= COSTS_N_INSNS (14);
5479 if ((val
& 0xffff) == 0)
5480 cost
= COSTS_N_INSNS (9);
5481 else if (val
> 0 && val
< 0x10000)
5482 cost
= COSTS_N_INSNS (11);
5491 *total
= COSTS_N_INSNS (20);
5498 *total
= COSTS_N_INSNS (4);
5501 if (XINT (x
, 1) == UNSPEC_CONVERT
)
5502 *total
= COSTS_N_INSNS (0);
5504 *total
= COSTS_N_INSNS (4);
5507 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5508 if (GET_MODE_CLASS (mode
) == MODE_INT
5509 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
5510 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
5511 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
5516 static enum machine_mode
5517 spu_unwind_word_mode (void)
5522 /* Decide whether we can make a sibling call to a function. DECL is the
5523 declaration of the function being targeted by the call and EXP is the
5524 CALL_EXPR representing the call. */
5526 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
5528 return decl
&& !TARGET_LARGE_MEM
;
5531 /* We need to correctly update the back chain pointer and the Available
5532 Stack Size (which is in the second slot of the sp register.) */
5534 spu_allocate_stack (rtx op0
, rtx op1
)
5537 rtx chain
= gen_reg_rtx (V4SImode
);
5538 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
5539 rtx sp
= gen_reg_rtx (V4SImode
);
5540 rtx splatted
= gen_reg_rtx (V4SImode
);
5541 rtx pat
= gen_reg_rtx (TImode
);
5543 /* copy the back chain so we can save it back again. */
5544 emit_move_insn (chain
, stack_bot
);
5546 op1
= force_reg (SImode
, op1
);
5548 v
= 0x1020300010203ll
;
5549 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
5550 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
5552 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
5553 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
5555 if (flag_stack_check
)
5557 rtx avail
= gen_reg_rtx(SImode
);
5558 rtx result
= gen_reg_rtx(SImode
);
5559 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
5560 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
5561 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
5564 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
5566 emit_move_insn (stack_bot
, chain
);
5568 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
5572 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5574 static unsigned char arr
[16] =
5575 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5576 rtx temp
= gen_reg_rtx (SImode
);
5577 rtx temp2
= gen_reg_rtx (SImode
);
5578 rtx temp3
= gen_reg_rtx (V4SImode
);
5579 rtx temp4
= gen_reg_rtx (V4SImode
);
5580 rtx pat
= gen_reg_rtx (TImode
);
5581 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5583 /* Restore the backchain from the first word, sp from the second. */
5584 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
5585 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
5587 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5589 /* Compute Available Stack Size for sp */
5590 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5591 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5593 /* Compute Available Stack Size for back chain */
5594 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
5595 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
5596 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
5598 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5599 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
5603 spu_init_libfuncs (void)
5605 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
5606 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
5607 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
5608 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
5609 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
5610 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
5611 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
5612 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
5613 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
5614 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
5615 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
5617 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
5618 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
5620 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
5621 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
5622 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
5623 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
5624 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
5625 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
5628 /* Make a subreg, stripping any existing subreg. We could possibly just
5629 call simplify_subreg, but in this case we know what we want. */
5631 spu_gen_subreg (enum machine_mode mode
, rtx x
)
5633 if (GET_CODE (x
) == SUBREG
)
5635 if (GET_MODE (x
) == mode
)
5637 return gen_rtx_SUBREG (mode
, x
, 0);
5641 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
5643 return (TYPE_MODE (type
) == BLKmode
5645 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
5646 || int_size_in_bytes (type
) >
5647 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
5650 /* Create the built-in types and functions */
5652 enum spu_function_code
5654 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5655 #include "spu-builtins.def"
5660 extern GTY(()) struct spu_builtin_description spu_builtins
[NUM_SPU_BUILTINS
];
5662 struct spu_builtin_description spu_builtins
[] = {
5663 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5664 {fcode, icode, name, type, params, NULL_TREE},
5665 #include "spu-builtins.def"
5670 spu_init_builtins (void)
5672 struct spu_builtin_description
*d
;
5675 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
5676 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
5677 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
5678 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
5679 V4SF_type_node
= build_vector_type (float_type_node
, 4);
5680 V2DF_type_node
= build_vector_type (double_type_node
, 2);
5682 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
5683 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
5684 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
5685 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
5687 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
5689 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
5690 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
5691 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
5692 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
5693 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
5694 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
5695 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
5696 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
5697 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
5698 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
5699 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
5700 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
5702 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
5703 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
5704 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
5705 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
5706 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
5707 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
5708 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
5709 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
5711 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
5712 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
5714 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
5716 spu_builtin_types
[SPU_BTI_PTR
] =
5717 build_pointer_type (build_qualified_type
5719 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
5721 /* For each builtin we build a new prototype. The tree code will make
5722 sure nodes are shared. */
5723 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
5726 char name
[64]; /* build_function will make a copy. */
5732 /* find last parm */
5733 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
5739 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
5741 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
5743 sprintf (name
, "__builtin_%s", d
->name
);
5745 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
5747 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5748 TREE_READONLY (d
->fndecl
) = 1;
5753 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
5755 static unsigned char arr
[16] =
5756 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5758 rtx temp
= gen_reg_rtx (Pmode
);
5759 rtx temp2
= gen_reg_rtx (V4SImode
);
5760 rtx temp3
= gen_reg_rtx (V4SImode
);
5761 rtx pat
= gen_reg_rtx (TImode
);
5762 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
5764 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
5766 /* Restore the sp. */
5767 emit_move_insn (temp
, op1
);
5768 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
5770 /* Compute available stack size for sp. */
5771 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
5772 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
5774 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
5775 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
5779 spu_safe_dma (HOST_WIDE_INT channel
)
5781 return TARGET_SAFE_DMA
&& channel
>= 21 && channel
<= 27;
5785 spu_builtin_splats (rtx ops
[])
5787 enum machine_mode mode
= GET_MODE (ops
[0]);
5788 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
5790 unsigned char arr
[16];
5791 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
5792 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
5796 rtx reg
= gen_reg_rtx (TImode
);
5798 if (GET_CODE (ops
[1]) != REG
5799 && GET_CODE (ops
[1]) != SUBREG
)
5800 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
5806 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
5812 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
5817 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
5822 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
5828 emit_move_insn (reg
, shuf
);
5829 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
5834 spu_builtin_extract (rtx ops
[])
5836 enum machine_mode mode
;
5839 mode
= GET_MODE (ops
[1]);
5841 if (GET_CODE (ops
[2]) == CONST_INT
)
5846 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
5849 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
5852 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
5855 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
5858 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
5861 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
5869 from
= spu_gen_subreg (TImode
, ops
[1]);
5870 rot
= gen_reg_rtx (TImode
);
5871 tmp
= gen_reg_rtx (SImode
);
5876 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
5879 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
5880 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
5884 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
5888 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
5893 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
5895 emit_insn (gen_spu_convert (ops
[0], rot
));
5899 spu_builtin_insert (rtx ops
[])
5901 enum machine_mode mode
= GET_MODE (ops
[0]);
5902 enum machine_mode imode
= GET_MODE_INNER (mode
);
5903 rtx mask
= gen_reg_rtx (TImode
);
5906 if (GET_CODE (ops
[3]) == CONST_INT
)
5907 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
5910 offset
= gen_reg_rtx (SImode
);
5911 emit_insn (gen_mulsi3
5912 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
5915 (mask
, stack_pointer_rtx
, offset
,
5916 GEN_INT (GET_MODE_SIZE (imode
))));
5917 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
5921 spu_builtin_promote (rtx ops
[])
5923 enum machine_mode mode
, imode
;
5924 rtx rot
, from
, offset
;
5927 mode
= GET_MODE (ops
[0]);
5928 imode
= GET_MODE_INNER (mode
);
5930 from
= gen_reg_rtx (TImode
);
5931 rot
= spu_gen_subreg (TImode
, ops
[0]);
5933 emit_insn (gen_spu_convert (from
, ops
[1]));
5935 if (GET_CODE (ops
[2]) == CONST_INT
)
5937 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
5938 if (GET_MODE_SIZE (imode
) < 4)
5939 pos
+= 4 - GET_MODE_SIZE (imode
);
5940 offset
= GEN_INT (pos
& 15);
5944 offset
= gen_reg_rtx (SImode
);
5948 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
5951 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
5952 emit_insn (gen_addsi3 (offset
, offset
, offset
));
5956 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
5957 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
5961 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
5967 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
5971 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
5973 rtx shuf
= gen_reg_rtx (V4SImode
);
5974 rtx insn
= gen_reg_rtx (V4SImode
);
5979 fnaddr
= force_reg (SImode
, fnaddr
);
5980 cxt
= force_reg (SImode
, cxt
);
5982 if (TARGET_LARGE_MEM
)
5984 rtx rotl
= gen_reg_rtx (V4SImode
);
5985 rtx mask
= gen_reg_rtx (V4SImode
);
5986 rtx bi
= gen_reg_rtx (SImode
);
5987 unsigned char shufa
[16] = {
5988 2, 3, 0, 1, 18, 19, 16, 17,
5989 0, 1, 2, 3, 16, 17, 18, 19
5991 unsigned char insna
[16] = {
5993 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
5995 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5998 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
5999 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
6001 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
6002 emit_insn (gen_rotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
6003 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
6004 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
6006 mem
= memory_address (Pmode
, tramp
);
6007 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
6009 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
6010 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
6011 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
6015 rtx scxt
= gen_reg_rtx (SImode
);
6016 rtx sfnaddr
= gen_reg_rtx (SImode
);
6017 unsigned char insna
[16] = {
6018 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
6024 shufc
= gen_reg_rtx (TImode
);
6025 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
6027 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6028 fits 18 bits and the last 4 are zeros. This will be true if
6029 the stack pointer is initialized to 0x3fff0 at program start,
6030 otherwise the ila instruction will be garbage. */
6032 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
6033 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
6035 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
6036 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
6037 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
6039 mem
= memory_address (Pmode
, tramp
);
6040 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
6043 emit_insn (gen_sync ());
6047 spu_expand_sign_extend (rtx ops
[])
6049 unsigned char arr
[16];
6050 rtx pat
= gen_reg_rtx (TImode
);
6053 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
6054 if (GET_MODE (ops
[1]) == QImode
)
6056 sign
= gen_reg_rtx (HImode
);
6057 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
6058 for (i
= 0; i
< 16; i
++)
6064 for (i
= 0; i
< 16; i
++)
6066 switch (GET_MODE (ops
[1]))
6069 sign
= gen_reg_rtx (SImode
);
6070 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
6072 arr
[last
- 1] = 0x02;
6075 sign
= gen_reg_rtx (SImode
);
6076 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
6077 for (i
= 0; i
< 4; i
++)
6078 arr
[last
- i
] = 3 - i
;
6081 sign
= gen_reg_rtx (SImode
);
6082 c
= gen_reg_rtx (SImode
);
6083 emit_insn (gen_spu_convert (c
, ops
[1]));
6084 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
6085 for (i
= 0; i
< 8; i
++)
6086 arr
[last
- i
] = 7 - i
;
6092 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
6093 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
6096 /* expand vector initialization. If there are any constant parts,
6097 load constant parts first. Then load any non-constant parts. */
6099 spu_expand_vector_init (rtx target
, rtx vals
)
6101 enum machine_mode mode
= GET_MODE (target
);
6102 int n_elts
= GET_MODE_NUNITS (mode
);
6104 bool all_same
= true;
6105 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
6108 first
= XVECEXP (vals
, 0, 0);
6109 for (i
= 0; i
< n_elts
; ++i
)
6111 x
= XVECEXP (vals
, 0, i
);
6112 if (!(CONST_INT_P (x
)
6113 || GET_CODE (x
) == CONST_DOUBLE
6114 || GET_CODE (x
) == CONST_FIXED
))
6118 if (first_constant
== NULL_RTX
)
6121 if (i
> 0 && !rtx_equal_p (x
, first
))
6125 /* if all elements are the same, use splats to repeat elements */
6128 if (!CONSTANT_P (first
)
6129 && !register_operand (first
, GET_MODE (x
)))
6130 first
= force_reg (GET_MODE (first
), first
);
6131 emit_insn (gen_spu_splats (target
, first
));
6135 /* load constant parts */
6136 if (n_var
!= n_elts
)
6140 emit_move_insn (target
,
6141 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
6145 rtx constant_parts_rtx
= copy_rtx (vals
);
6147 gcc_assert (first_constant
!= NULL_RTX
);
6148 /* fill empty slots with the first constant, this increases
6149 our chance of using splats in the recursive call below. */
6150 for (i
= 0; i
< n_elts
; ++i
)
6152 x
= XVECEXP (constant_parts_rtx
, 0, i
);
6153 if (!(CONST_INT_P (x
)
6154 || GET_CODE (x
) == CONST_DOUBLE
6155 || GET_CODE (x
) == CONST_FIXED
))
6156 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
6159 spu_expand_vector_init (target
, constant_parts_rtx
);
6163 /* load variable parts */
6166 rtx insert_operands
[4];
6168 insert_operands
[0] = target
;
6169 insert_operands
[2] = target
;
6170 for (i
= 0; i
< n_elts
; ++i
)
6172 x
= XVECEXP (vals
, 0, i
);
6173 if (!(CONST_INT_P (x
)
6174 || GET_CODE (x
) == CONST_DOUBLE
6175 || GET_CODE (x
) == CONST_FIXED
))
6177 if (!register_operand (x
, GET_MODE (x
)))
6178 x
= force_reg (GET_MODE (x
), x
);
6179 insert_operands
[1] = x
;
6180 insert_operands
[3] = GEN_INT (i
);
6181 spu_builtin_insert (insert_operands
);
6187 /* Return insn index for the vector compare instruction for given CODE,
6188 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6191 get_vec_cmp_insn (enum rtx_code code
,
6192 enum machine_mode dest_mode
,
6193 enum machine_mode op_mode
)
6199 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6200 return CODE_FOR_ceq_v16qi
;
6201 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6202 return CODE_FOR_ceq_v8hi
;
6203 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6204 return CODE_FOR_ceq_v4si
;
6205 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6206 return CODE_FOR_ceq_v4sf
;
6207 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6208 return CODE_FOR_ceq_v2df
;
6211 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6212 return CODE_FOR_cgt_v16qi
;
6213 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6214 return CODE_FOR_cgt_v8hi
;
6215 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6216 return CODE_FOR_cgt_v4si
;
6217 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
6218 return CODE_FOR_cgt_v4sf
;
6219 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
6220 return CODE_FOR_cgt_v2df
;
6223 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
6224 return CODE_FOR_clgt_v16qi
;
6225 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
6226 return CODE_FOR_clgt_v8hi
;
6227 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
6228 return CODE_FOR_clgt_v4si
;
6236 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6237 DMODE is expected destination mode. This is a recursive function. */
6240 spu_emit_vector_compare (enum rtx_code rcode
,
6242 enum machine_mode dmode
)
6246 enum machine_mode dest_mode
;
6247 enum machine_mode op_mode
= GET_MODE (op1
);
6249 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
6251 /* Floating point vector compare instructions uses destination V4SImode.
6252 Double floating point vector compare instructions uses destination V2DImode.
6253 Move destination to appropriate mode later. */
6254 if (dmode
== V4SFmode
)
6255 dest_mode
= V4SImode
;
6256 else if (dmode
== V2DFmode
)
6257 dest_mode
= V2DImode
;
6261 mask
= gen_reg_rtx (dest_mode
);
6262 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6264 if (vec_cmp_insn
== -1)
6266 bool swap_operands
= false;
6267 bool try_again
= false;
6272 swap_operands
= true;
6277 swap_operands
= true;
6281 /* Treat A != B as ~(A==B). */
6283 enum insn_code nor_code
;
6284 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6285 nor_code
= optab_handler (one_cmpl_optab
, (int)dest_mode
)->insn_code
;
6286 gcc_assert (nor_code
!= CODE_FOR_nothing
);
6287 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
6288 if (dmode
!= dest_mode
)
6290 rtx temp
= gen_reg_rtx (dest_mode
);
6291 convert_move (temp
, mask
, 0);
6301 /* Try GT/GTU/LT/LTU OR EQ */
6304 enum insn_code ior_code
;
6305 enum rtx_code new_code
;
6309 case GE
: new_code
= GT
; break;
6310 case GEU
: new_code
= GTU
; break;
6311 case LE
: new_code
= LT
; break;
6312 case LEU
: new_code
= LTU
; break;
6317 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
6318 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
6320 ior_code
= optab_handler (ior_optab
, (int)dest_mode
)->insn_code
;
6321 gcc_assert (ior_code
!= CODE_FOR_nothing
);
6322 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
6323 if (dmode
!= dest_mode
)
6325 rtx temp
= gen_reg_rtx (dest_mode
);
6326 convert_move (temp
, mask
, 0);
6336 /* You only get two chances. */
6338 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
6340 gcc_assert (vec_cmp_insn
!= -1);
6351 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
6352 if (dmode
!= dest_mode
)
6354 rtx temp
= gen_reg_rtx (dest_mode
);
6355 convert_move (temp
, mask
, 0);
6362 /* Emit vector conditional expression.
6363 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6364 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6367 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
6368 rtx cond
, rtx cc_op0
, rtx cc_op1
)
6370 enum machine_mode dest_mode
= GET_MODE (dest
);
6371 enum rtx_code rcode
= GET_CODE (cond
);
6374 /* Get the vector mask for the given relational operations. */
6375 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
6377 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
6383 spu_force_reg (enum machine_mode mode
, rtx op
)
6386 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
6388 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
6389 || GET_MODE (op
) == BLKmode
)
6390 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
6394 r
= force_reg (GET_MODE (op
), op
);
6395 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
6397 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
6402 x
= gen_reg_rtx (mode
);
6403 emit_insn (gen_spu_convert (x
, r
));
6408 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
6410 HOST_WIDE_INT v
= 0;
6412 /* Check the range of immediate operands. */
6413 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
6415 int range
= p
- SPU_BTI_7
;
6417 if (!CONSTANT_P (op
))
6418 error ("%s expects an integer literal in the range [%d, %d].",
6420 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
6422 if (GET_CODE (op
) == CONST
6423 && (GET_CODE (XEXP (op
, 0)) == PLUS
6424 || GET_CODE (XEXP (op
, 0)) == MINUS
))
6426 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
6427 op
= XEXP (XEXP (op
, 0), 0);
6429 else if (GET_CODE (op
) == CONST_INT
)
6431 else if (GET_CODE (op
) == CONST_VECTOR
6432 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
6433 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
6435 /* The default for v is 0 which is valid in every range. */
6436 if (v
< spu_builtin_range
[range
].low
6437 || v
> spu_builtin_range
[range
].high
)
6438 error ("%s expects an integer literal in the range [%d, %d]. ("
6439 HOST_WIDE_INT_PRINT_DEC
")",
6441 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
6450 /* This is only used in lqa, and stqa. Even though the insns
6451 encode 16 bits of the address (all but the 2 least
6452 significant), only 14 bits are used because it is masked to
6453 be 16 byte aligned. */
6457 /* This is used for lqr and stqr. */
6464 if (GET_CODE (op
) == LABEL_REF
6465 || (GET_CODE (op
) == SYMBOL_REF
6466 && SYMBOL_REF_FUNCTION_P (op
))
6467 || (v
& ((1 << lsbits
) - 1)) != 0)
6468 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
6475 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
6476 rtx target
, rtx ops
[])
6478 enum insn_code icode
= d
->icode
;
6481 /* Expand the arguments into rtl. */
6483 if (d
->parm
[0] != SPU_BTI_VOID
)
6486 for (a
= 0; i
< insn_data
[icode
].n_operands
; i
++, a
++)
6488 tree arg
= CALL_EXPR_ARG (exp
, a
);
6491 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, 0);
6496 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
6497 tree exp
, rtx target
)
6501 enum insn_code icode
= d
->icode
;
6502 enum machine_mode mode
, tmode
;
6506 /* Set up ops[] with values from arglist. */
6507 expand_builtin_args (d
, exp
, target
, ops
);
6509 /* Handle the target operand which must be operand 0. */
6511 if (d
->parm
[0] != SPU_BTI_VOID
)
6514 /* We prefer the mode specified for the match_operand otherwise
6515 use the mode from the builtin function prototype. */
6516 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
6517 if (tmode
== VOIDmode
)
6518 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
6520 /* Try to use target because not using it can lead to extra copies
6521 and when we are using all of the registers extra copies leads
6523 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
6526 target
= ops
[0] = gen_reg_rtx (tmode
);
6528 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
6534 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
6536 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
6541 arg
= CALL_EXPR_ARG (exp
, 0);
6542 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
6543 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
6544 addr
= memory_address (mode
, op
);
6547 op
= gen_reg_rtx (GET_MODE (addr
));
6548 emit_insn (gen_rtx_SET (VOIDmode
, op
,
6549 gen_rtx_NEG (GET_MODE (addr
), addr
)));
6550 op
= gen_rtx_MEM (mode
, op
);
6552 pat
= GEN_FCN (icode
) (target
, op
);
6559 /* Ignore align_hint, but still expand it's args in case they have
6561 if (icode
== CODE_FOR_spu_align_hint
)
6564 /* Handle the rest of the operands. */
6565 for (p
= 1; i
< insn_data
[icode
].n_operands
; i
++, p
++)
6567 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
6568 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
6570 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
6572 /* mode can be VOIDmode here for labels */
6574 /* For specific intrinsics with an immediate operand, e.g.,
6575 si_ai(), we sometimes need to convert the scalar argument to a
6576 vector argument by splatting the scalar. */
6577 if (VECTOR_MODE_P (mode
)
6578 && (GET_CODE (ops
[i
]) == CONST_INT
6579 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
6580 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
6582 if (GET_CODE (ops
[i
]) == CONST_INT
)
6583 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
6586 rtx reg
= gen_reg_rtx (mode
);
6587 enum machine_mode imode
= GET_MODE_INNER (mode
);
6588 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
6589 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
6590 if (imode
!= GET_MODE (ops
[i
]))
6591 ops
[i
] = convert_to_mode (imode
, ops
[i
],
6592 TYPE_UNSIGNED (spu_builtin_types
6594 emit_insn (gen_spu_splats (reg
, ops
[i
]));
6599 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
6601 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
6602 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
6605 switch (insn_data
[icode
].n_operands
)
6608 pat
= GEN_FCN (icode
) (0);
6611 pat
= GEN_FCN (icode
) (ops
[0]);
6614 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
6617 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
6620 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
6623 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
6626 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
6635 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
6636 emit_call_insn (pat
);
6637 else if (d
->type
== B_JUMP
)
6639 emit_jump_insn (pat
);
6645 return_type
= spu_builtin_types
[d
->parm
[0]];
6646 if (d
->parm
[0] != SPU_BTI_VOID
6647 && GET_MODE (target
) != TYPE_MODE (return_type
))
6649 /* target is the return value. It should always be the mode of
6650 the builtin function prototype. */
6651 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
6658 spu_expand_builtin (tree exp
,
6660 rtx subtarget ATTRIBUTE_UNUSED
,
6661 enum machine_mode mode ATTRIBUTE_UNUSED
,
6662 int ignore ATTRIBUTE_UNUSED
)
6664 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
6665 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
6666 struct spu_builtin_description
*d
;
6668 if (fcode
< NUM_SPU_BUILTINS
)
6670 d
= &spu_builtins
[fcode
];
6672 return spu_expand_builtin_1 (d
, exp
, target
);
6677 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6679 spu_builtin_mul_widen_even (tree type
)
6681 switch (TYPE_MODE (type
))
6684 if (TYPE_UNSIGNED (type
))
6685 return spu_builtins
[SPU_MULE_0
].fndecl
;
6687 return spu_builtins
[SPU_MULE_1
].fndecl
;
6694 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6696 spu_builtin_mul_widen_odd (tree type
)
6698 switch (TYPE_MODE (type
))
6701 if (TYPE_UNSIGNED (type
))
6702 return spu_builtins
[SPU_MULO_1
].fndecl
;
6704 return spu_builtins
[SPU_MULO_0
].fndecl
;
6711 /* Implement targetm.vectorize.builtin_mask_for_load. */
6713 spu_builtin_mask_for_load (void)
6715 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
6720 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6722 spu_builtin_vectorization_cost (bool runtime_test
)
6724 /* If the branch of the runtime test is taken - i.e. - the vectorized
6725 version is skipped - this incurs a misprediction cost (because the
6726 vectorized version is expected to be the fall-through). So we subtract
6727 the latency of a mispredicted branch from the costs that are incurred
6728 when the vectorized version is executed. */
6735 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6736 after applying N number of iterations. This routine does not determine
6737 how may iterations are required to reach desired alignment. */
6740 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
6745 /* All other types are naturally aligned. */
6749 static enum machine_mode
6750 spu_ea_pointer_mode (int addrspace
)
6757 return (spu_ea_model
== 64 ? DImode
: ptr_mode
);
6764 spu_valid_pointer_mode (enum machine_mode mode
)
6766 return (mode
== ptr_mode
|| mode
== Pmode
|| mode
== spu_ea_pointer_mode (1));
6769 /* Count the total number of instructions in each pipe and return the
6770 maximum, which is used as the Minimum Iteration Interval (MII)
6771 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6772 -2 are instructions that can go in pipe0 or pipe1. */
6774 spu_sms_res_mii (struct ddg
*g
)
6777 unsigned t
[4] = {0, 0, 0, 0};
6779 for (i
= 0; i
< g
->num_nodes
; i
++)
6781 rtx insn
= g
->nodes
[i
].insn
;
6782 int p
= get_pipe (insn
) + 2;
6788 if (dump_file
&& INSN_P (insn
))
6789 fprintf (dump_file
, "i%d %s %d %d\n",
6791 insn_data
[INSN_CODE(insn
)].name
,
6795 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
6797 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
6802 spu_init_expanders (void)
6807 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6808 frame_pointer_needed is true. We don't know that until we're
6809 expanding the prologue. */
6810 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
6812 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6813 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want to
6814 handle those cases specially, so we reserve those two registers
6815 here by generating them. */
6816 r0
= gen_reg_rtx (SImode
);
6817 r1
= gen_reg_rtx (SImode
);
6818 mark_reg_pointer (r0
, 128);
6819 mark_reg_pointer (r1
, 128);
6820 gcc_assert (REGNO (r0
) == LAST_VIRTUAL_REGISTER
+ 1
6821 && REGNO (r1
) == LAST_VIRTUAL_REGISTER
+ 2);
6825 static enum machine_mode
6826 spu_libgcc_cmp_return_mode (void)
6829 /* For SPU word mode is TI mode so it is better to use SImode
6830 for compare returns. */
6834 static enum machine_mode
6835 spu_libgcc_shift_count_mode (void)
6837 /* For SPU word mode is TI mode so it is better to use SImode
6838 for shift counts. */
6842 /* An early place to adjust some flags after GCC has finished processing
6845 asm_file_start (void)
6847 /* Variable tracking should be run after all optimizations which
6848 change order of insns. It also needs a valid CFG. */
6849 spu_flag_var_tracking
= flag_var_tracking
;
6850 flag_var_tracking
= 0;
6852 default_file_start ();
6855 /* Implement targetm.section_type_flags. */
6857 spu_section_type_flags (tree decl
, const char *name
, int reloc
)
6859 /* .toe needs to have type @nobits. */
6860 if (strcmp (name
, ".toe") == 0)
6862 if (strcmp (name
, "._ea") == 0)
6863 return SECTION_WRITE
| SECTION_DEBUG
;
6864 return default_section_type_flags (decl
, name
, reloc
);
6868 spu_addr_space_name (int addrspace
)
6870 gcc_assert (addrspace
> 0 && addrspace
<= 1);
6871 return (spu_address_spaces
[addrspace
].name
);
6875 rtx (* spu_addr_space_conversion_rtl (int from
, int to
)) (rtx
, rtx
)
6877 gcc_assert ((from
== 0 && to
== 1) || (from
== 1 && to
== 0));
6880 return spu_address_spaces
[1].to_generic_insn
;
6882 return spu_address_spaces
[1].from_generic_insn
;
6888 bool spu_valid_addr_space (const_tree value
)
6894 for (i
= 0; spu_address_spaces
[i
].name
; i
++)
6895 if (strcmp (IDENTIFIER_POINTER (value
), spu_address_spaces
[i
].name
) == 0)
6901 unsigned char spu_addr_space_number (tree ident
)
6907 for (i
= 0; spu_address_spaces
[i
].name
; i
++)
6908 if (strcmp (IDENTIFIER_POINTER (ident
), spu_address_spaces
[i
].name
) == 0)
6914 /* Generate a constant or register which contains 2^SCALE. We assume
6915 the result is valid for MODE. Currently, MODE must be V4SFmode and
6916 SCALE must be SImode. */
6918 spu_gen_exp2 (enum machine_mode mode
, rtx scale
)
6920 gcc_assert (mode
== V4SFmode
);
6921 gcc_assert (GET_MODE (scale
) == SImode
|| GET_CODE (scale
) == CONST_INT
);
6922 if (GET_CODE (scale
) != CONST_INT
)
6924 /* unsigned int exp = (127 + scale) << 23;
6925 __vector float m = (__vector float) spu_splats (exp); */
6926 rtx reg
= force_reg (SImode
, scale
);
6927 rtx exp
= gen_reg_rtx (SImode
);
6928 rtx mul
= gen_reg_rtx (mode
);
6929 emit_insn (gen_addsi3 (exp
, reg
, GEN_INT (127)));
6930 emit_insn (gen_ashlsi3 (exp
, exp
, GEN_INT (23)));
6931 emit_insn (gen_spu_splats (mul
, gen_rtx_SUBREG (GET_MODE_INNER (mode
), exp
, 0)));
6936 HOST_WIDE_INT exp
= 127 + INTVAL (scale
);
6937 unsigned char arr
[16];
6938 arr
[0] = arr
[4] = arr
[8] = arr
[12] = exp
>> 1;
6939 arr
[1] = arr
[5] = arr
[9] = arr
[13] = exp
<< 7;
6940 arr
[2] = arr
[6] = arr
[10] = arr
[14] = 0;
6941 arr
[3] = arr
[7] = arr
[11] = arr
[15] = 0;
6942 return array_to_constant (mode
, arr
);