1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tree-gimple.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range
[] = {
65 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
66 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
68 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
69 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
71 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
72 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
73 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
87 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
88 static rtx
get_pic_reg (void);
89 static int need_to_save_reg (int regno
, int saving
);
90 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
91 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
92 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
94 static void emit_nop_for_insn (rtx insn
);
95 static bool insn_clobbers_hbr (rtx insn
);
96 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
98 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
99 enum machine_mode dmode
);
100 static rtx
get_branch_target (rtx branch
);
101 static void insert_branch_hints (void);
102 static void insert_nops (void);
103 static void spu_machine_dependent_reorg (void);
104 static int spu_sched_issue_rate (void);
105 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
107 static int get_pipe (rtx insn
);
108 static int spu_sched_adjust_priority (rtx insn
, int pri
);
109 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
110 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
112 unsigned char *no_add_attrs
);
113 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
115 unsigned char *no_add_attrs
);
116 static int spu_naked_function_p (tree func
);
117 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
118 const_tree type
, unsigned char named
);
119 static tree
spu_build_builtin_va_list (void);
120 static void spu_va_start (tree
, rtx
);
121 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
123 static int regno_aligned_for_load (int regno
);
124 static int store_with_one_insn_p (rtx mem
);
125 static int reg_align (rtx reg
);
126 static int mem_is_padded_component_ref (rtx x
);
127 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
128 static void spu_asm_globalize_label (FILE * file
, const char *name
);
129 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
131 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
132 static void spu_init_libfuncs (void);
133 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
134 static void fix_range (const char *);
135 static void spu_encode_section_info (tree
, rtx
, int);
136 static tree
spu_builtin_mul_widen_even (tree
);
137 static tree
spu_builtin_mul_widen_odd (tree
);
138 static tree
spu_builtin_mask_for_load (void);
139 static int spu_builtin_vectorization_cost (bool);
140 static bool spu_vector_alignment_reachable (const_tree
, bool);
141 static int spu_sms_res_mii (struct ddg
*g
);
143 extern const char *reg_names
[];
144 rtx spu_compare_op0
, spu_compare_op1
;
146 /* Which instruction set architecture to use. */
148 /* Which cpu are we tuning for. */
164 IC_POOL
, /* constant pool */
165 IC_IL1
, /* one il* instruction */
166 IC_IL2
, /* both ilhu and iohl instructions */
167 IC_IL1s
, /* one il* instruction */
168 IC_IL2s
, /* both ilhu and iohl instructions */
169 IC_FSMBI
, /* the fsmbi instruction */
170 IC_CPAT
, /* one of the c*d instructions */
171 IC_FSMBI2
/* fsmbi plus 1 other instruction */
174 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
175 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
176 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
177 static enum immediate_class
classify_immediate (rtx op
,
178 enum machine_mode mode
);
180 static enum machine_mode
181 spu_libgcc_cmp_return_mode (void);
183 static enum machine_mode
184 spu_libgcc_shift_count_mode (void);
186 /* Built in types. */
187 tree spu_builtin_types
[SPU_BTI_MAX
];
189 /* TARGET overrides. */
191 #undef TARGET_INIT_BUILTINS
192 #define TARGET_INIT_BUILTINS spu_init_builtins
194 #undef TARGET_EXPAND_BUILTIN
195 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
197 #undef TARGET_EH_RETURN_FILTER_MODE
198 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
200 /* The .8byte directive doesn't seem to work well for a 32 bit
202 #undef TARGET_ASM_UNALIGNED_DI_OP
203 #define TARGET_ASM_UNALIGNED_DI_OP NULL
205 #undef TARGET_RTX_COSTS
206 #define TARGET_RTX_COSTS spu_rtx_costs
208 #undef TARGET_ADDRESS_COST
209 #define TARGET_ADDRESS_COST hook_int_rtx_0
211 #undef TARGET_SCHED_ISSUE_RATE
212 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
214 #undef TARGET_SCHED_VARIABLE_ISSUE
215 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
217 #undef TARGET_SCHED_ADJUST_PRIORITY
218 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
220 #undef TARGET_SCHED_ADJUST_COST
221 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
223 const struct attribute_spec spu_attribute_table
[];
224 #undef TARGET_ATTRIBUTE_TABLE
225 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
227 #undef TARGET_ASM_INTEGER
228 #define TARGET_ASM_INTEGER spu_assemble_integer
230 #undef TARGET_SCALAR_MODE_SUPPORTED_P
231 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
233 #undef TARGET_VECTOR_MODE_SUPPORTED_P
234 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
236 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
237 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
239 #undef TARGET_ASM_GLOBALIZE_LABEL
240 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
242 #undef TARGET_PASS_BY_REFERENCE
243 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
245 #undef TARGET_MUST_PASS_IN_STACK
246 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
248 #undef TARGET_BUILD_BUILTIN_VA_LIST
249 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
251 #undef TARGET_EXPAND_BUILTIN_VA_START
252 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
254 #undef TARGET_SETUP_INCOMING_VARARGS
255 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
257 #undef TARGET_MACHINE_DEPENDENT_REORG
258 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
260 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
261 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
263 #undef TARGET_DEFAULT_TARGET_FLAGS
264 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
266 #undef TARGET_INIT_LIBFUNCS
267 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
269 #undef TARGET_RETURN_IN_MEMORY
270 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
272 #undef TARGET_ENCODE_SECTION_INFO
273 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
275 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
276 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
278 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
279 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
281 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
282 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
284 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
285 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
287 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
288 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
290 #undef TARGET_LIBGCC_CMP_RETURN_MODE
291 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
293 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
294 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
296 #undef TARGET_SCHED_SMS_RES_MII
297 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
299 struct gcc_target targetm
= TARGET_INITIALIZER
;
302 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
304 /* Override some of the default param values. With so many registers
305 larger values are better for these params. */
306 MAX_PENDING_LIST_LENGTH
= 128;
308 /* With so many registers this is better on by default. */
309 flag_rename_registers
= 1;
312 /* Sometimes certain combinations of command options do not make sense
313 on a particular target machine. You can define a macro
314 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
315 executed once just after all the command options have been parsed. */
317 spu_override_options (void)
319 /* Small loops will be unpeeled at -O3. For SPU it is more important
320 to keep code small by default. */
321 if (!flag_unroll_loops
&& !flag_peel_loops
322 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
323 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
325 flag_omit_frame_pointer
= 1;
327 if (align_functions
< 8)
330 if (spu_fixed_range_string
)
331 fix_range (spu_fixed_range_string
);
333 /* Determine processor architectural level. */
336 if (strcmp (&spu_arch_string
[0], "cell") == 0)
337 spu_arch
= PROCESSOR_CELL
;
338 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
339 spu_arch
= PROCESSOR_CELLEDP
;
341 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
344 /* Determine processor to tune for. */
347 if (strcmp (&spu_tune_string
[0], "cell") == 0)
348 spu_tune
= PROCESSOR_CELL
;
349 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
350 spu_tune
= PROCESSOR_CELLEDP
;
352 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
356 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
357 struct attribute_spec.handler. */
359 /* Table of machine attributes. */
360 const struct attribute_spec spu_attribute_table
[] =
362 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
363 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
364 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
365 { NULL
, 0, 0, false, false, false, NULL
}
368 /* True if MODE is valid for the target. By "valid", we mean able to
369 be manipulated in non-trivial ways. In particular, this means all
370 the arithmetic is supported. */
372 spu_scalar_mode_supported_p (enum machine_mode mode
)
390 /* Similarly for vector modes. "Supported" here is less strict. At
391 least some operations are supported; need to check optabs or builtins
392 for further details. */
394 spu_vector_mode_supported_p (enum machine_mode mode
)
411 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
412 least significant bytes of the outer mode. This function returns
413 TRUE for the SUBREG's where this is correct. */
415 valid_subreg (rtx op
)
417 enum machine_mode om
= GET_MODE (op
);
418 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
419 return om
!= VOIDmode
&& im
!= VOIDmode
420 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
421 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4));
424 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
425 and adjust the start offset. */
427 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
429 enum machine_mode mode
;
431 /* Strip any SUBREG */
432 if (GET_CODE (op
) == SUBREG
)
436 GET_MODE_BITSIZE (GET_MODE (op
)) -
437 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
438 op
= SUBREG_REG (op
);
440 /* If it is smaller than SI, assure a SUBREG */
441 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
445 *start
+= 32 - op_size
;
448 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
449 mode
= mode_for_size (op_size
, MODE_INT
, 0);
450 if (mode
!= GET_MODE (op
))
451 op
= gen_rtx_SUBREG (mode
, op
, 0);
456 spu_expand_extv (rtx ops
[], int unsignedp
)
458 HOST_WIDE_INT width
= INTVAL (ops
[2]);
459 HOST_WIDE_INT start
= INTVAL (ops
[3]);
460 HOST_WIDE_INT src_size
, dst_size
;
461 enum machine_mode src_mode
, dst_mode
;
462 rtx dst
= ops
[0], src
= ops
[1];
465 dst
= adjust_operand (ops
[0], 0);
466 dst_mode
= GET_MODE (dst
);
467 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
469 src
= adjust_operand (src
, &start
);
470 src_mode
= GET_MODE (src
);
471 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
475 s
= gen_reg_rtx (src_mode
);
479 emit_insn (gen_ashlsi3 (s
, src
, GEN_INT (start
)));
482 emit_insn (gen_ashldi3 (s
, src
, GEN_INT (start
)));
485 emit_insn (gen_ashlti3 (s
, src
, GEN_INT (start
)));
493 if (width
< src_size
)
500 icode
= unsignedp
? CODE_FOR_lshrsi3
: CODE_FOR_ashrsi3
;
503 icode
= unsignedp
? CODE_FOR_lshrdi3
: CODE_FOR_ashrdi3
;
506 icode
= unsignedp
? CODE_FOR_lshrti3
: CODE_FOR_ashrti3
;
511 s
= gen_reg_rtx (src_mode
);
512 pat
= GEN_FCN (icode
) (s
, src
, GEN_INT (src_size
- width
));
517 convert_move (dst
, src
, unsignedp
);
521 spu_expand_insv (rtx ops
[])
523 HOST_WIDE_INT width
= INTVAL (ops
[1]);
524 HOST_WIDE_INT start
= INTVAL (ops
[2]);
525 HOST_WIDE_INT maskbits
;
526 enum machine_mode dst_mode
, src_mode
;
527 rtx dst
= ops
[0], src
= ops
[3];
528 int dst_size
, src_size
;
534 if (GET_CODE (ops
[0]) == MEM
)
535 dst
= gen_reg_rtx (TImode
);
537 dst
= adjust_operand (dst
, &start
);
538 dst_mode
= GET_MODE (dst
);
539 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
541 if (CONSTANT_P (src
))
543 enum machine_mode m
=
544 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
545 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
547 src
= adjust_operand (src
, 0);
548 src_mode
= GET_MODE (src
);
549 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
551 mask
= gen_reg_rtx (dst_mode
);
552 shift_reg
= gen_reg_rtx (dst_mode
);
553 shift
= dst_size
- start
- width
;
555 /* It's not safe to use subreg here because the compiler assumes
556 that the SUBREG_REG is right justified in the SUBREG. */
557 convert_move (shift_reg
, src
, 1);
564 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
567 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
570 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
582 maskbits
= (-1ll << (32 - width
- start
));
584 maskbits
+= (1ll << (32 - start
));
585 emit_move_insn (mask
, GEN_INT (maskbits
));
588 maskbits
= (-1ll << (64 - width
- start
));
590 maskbits
+= (1ll << (64 - start
));
591 emit_move_insn (mask
, GEN_INT (maskbits
));
595 unsigned char arr
[16];
597 memset (arr
, 0, sizeof (arr
));
598 arr
[i
] = 0xff >> (start
& 7);
599 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
601 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
602 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
608 if (GET_CODE (ops
[0]) == MEM
)
610 rtx aligned
= gen_reg_rtx (SImode
);
611 rtx low
= gen_reg_rtx (SImode
);
612 rtx addr
= gen_reg_rtx (SImode
);
613 rtx rotl
= gen_reg_rtx (SImode
);
614 rtx mask0
= gen_reg_rtx (TImode
);
617 emit_move_insn (addr
, XEXP (ops
[0], 0));
618 emit_insn (gen_andsi3 (aligned
, addr
, GEN_INT (-16)));
619 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
620 emit_insn (gen_negsi2 (rotl
, low
));
621 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
622 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
623 mem
= change_address (ops
[0], TImode
, aligned
);
624 set_mem_alias_set (mem
, 0);
625 emit_move_insn (dst
, mem
);
626 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
627 emit_move_insn (mem
, dst
);
628 if (start
+ width
> MEM_ALIGN (ops
[0]))
630 rtx shl
= gen_reg_rtx (SImode
);
631 rtx mask1
= gen_reg_rtx (TImode
);
632 rtx dst1
= gen_reg_rtx (TImode
);
634 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
635 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
636 mem1
= adjust_address (mem
, TImode
, 16);
637 set_mem_alias_set (mem1
, 0);
638 emit_move_insn (dst1
, mem1
);
639 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
640 emit_move_insn (mem1
, dst1
);
644 emit_insn (gen_selb (dst
, copy_rtx (dst
), shift_reg
, mask
));
649 spu_expand_block_move (rtx ops
[])
651 HOST_WIDE_INT bytes
, align
, offset
;
652 rtx src
, dst
, sreg
, dreg
, target
;
654 if (GET_CODE (ops
[2]) != CONST_INT
655 || GET_CODE (ops
[3]) != CONST_INT
656 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO
* 8))
659 bytes
= INTVAL (ops
[2]);
660 align
= INTVAL (ops
[3]);
670 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
672 dst
= adjust_address (ops
[0], V16QImode
, offset
);
673 src
= adjust_address (ops
[1], V16QImode
, offset
);
674 emit_move_insn (dst
, src
);
679 unsigned char arr
[16] = { 0 };
680 for (i
= 0; i
< bytes
- offset
; i
++)
682 dst
= adjust_address (ops
[0], V16QImode
, offset
);
683 src
= adjust_address (ops
[1], V16QImode
, offset
);
684 mask
= gen_reg_rtx (V16QImode
);
685 sreg
= gen_reg_rtx (V16QImode
);
686 dreg
= gen_reg_rtx (V16QImode
);
687 target
= gen_reg_rtx (V16QImode
);
688 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
689 emit_move_insn (dreg
, dst
);
690 emit_move_insn (sreg
, src
);
691 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
692 emit_move_insn (dst
, target
);
700 { SPU_EQ
, SPU_GT
, SPU_GTU
};
702 int spu_comp_icode
[12][3] = {
703 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
704 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
705 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
706 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
707 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
708 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
709 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
710 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
711 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
712 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
713 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
714 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
717 /* Generate a compare for CODE. Return a brand-new rtx that represents
718 the result of the compare. GCC can figure this out too if we don't
719 provide all variations of compares, but GCC always wants to use
720 WORD_MODE, we can generate better code in most cases if we do it
723 spu_emit_branch_or_set (int is_set
, enum rtx_code code
, rtx operands
[])
725 int reverse_compare
= 0;
726 int reverse_test
= 0;
727 rtx compare_result
, eq_result
;
728 rtx comp_rtx
, eq_rtx
;
729 rtx target
= operands
[0];
730 enum machine_mode comp_mode
;
731 enum machine_mode op_mode
;
732 enum spu_comp_code scode
, eq_code
, ior_code
;
736 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
737 and so on, to keep the constant in operand 1. */
738 if (GET_CODE (spu_compare_op1
) == CONST_INT
)
740 HOST_WIDE_INT val
= INTVAL (spu_compare_op1
) - 1;
741 if (trunc_int_for_mode (val
, GET_MODE (spu_compare_op0
)) == val
)
745 spu_compare_op1
= GEN_INT (val
);
749 spu_compare_op1
= GEN_INT (val
);
753 spu_compare_op1
= GEN_INT (val
);
757 spu_compare_op1
= GEN_INT (val
);
766 op_mode
= GET_MODE (spu_compare_op0
);
772 if (HONOR_NANS (op_mode
))
787 if (HONOR_NANS (op_mode
))
879 comp_mode
= V4SImode
;
883 comp_mode
= V2DImode
;
890 if (GET_MODE (spu_compare_op1
) == DFmode
891 && (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
894 if (is_set
== 0 && spu_compare_op1
== const0_rtx
895 && (GET_MODE (spu_compare_op0
) == SImode
896 || GET_MODE (spu_compare_op0
) == HImode
) && scode
== SPU_EQ
)
898 /* Don't need to set a register with the result when we are
899 comparing against zero and branching. */
900 reverse_test
= !reverse_test
;
901 compare_result
= spu_compare_op0
;
905 compare_result
= gen_reg_rtx (comp_mode
);
909 rtx t
= spu_compare_op1
;
910 spu_compare_op1
= spu_compare_op0
;
914 if (spu_comp_icode
[index
][scode
] == 0)
917 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
918 (spu_compare_op0
, op_mode
))
919 spu_compare_op0
= force_reg (op_mode
, spu_compare_op0
);
920 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
921 (spu_compare_op1
, op_mode
))
922 spu_compare_op1
= force_reg (op_mode
, spu_compare_op1
);
923 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
928 emit_insn (comp_rtx
);
932 eq_result
= gen_reg_rtx (comp_mode
);
933 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
939 ior_code
= ior_optab
->handlers
[(int)comp_mode
].insn_code
;
940 gcc_assert (ior_code
!= CODE_FOR_nothing
);
941 emit_insn (GEN_FCN (ior_code
)
942 (compare_result
, compare_result
, eq_result
));
951 /* We don't have branch on QI compare insns, so we convert the
952 QI compare result to a HI result. */
953 if (comp_mode
== QImode
)
955 rtx old_res
= compare_result
;
956 compare_result
= gen_reg_rtx (HImode
);
958 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
962 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
964 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
966 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, target
);
967 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
968 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
971 else if (is_set
== 2)
973 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
974 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
975 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
977 rtx op_t
= operands
[2];
978 rtx op_f
= operands
[3];
980 /* The result of the comparison can be SI, HI or QI mode. Create a
981 mask based on that result. */
982 if (target_size
> compare_size
)
984 select_mask
= gen_reg_rtx (mode
);
985 emit_insn (gen_extend_compare (select_mask
, compare_result
));
987 else if (target_size
< compare_size
)
989 gen_rtx_SUBREG (mode
, compare_result
,
990 (compare_size
- target_size
) / BITS_PER_UNIT
);
991 else if (comp_mode
!= mode
)
992 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
994 select_mask
= compare_result
;
996 if (GET_MODE (target
) != GET_MODE (op_t
)
997 || GET_MODE (target
) != GET_MODE (op_f
))
1001 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1003 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1008 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1009 gen_rtx_NOT (comp_mode
, compare_result
)));
1010 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1011 emit_insn (gen_extendhisi2 (target
, compare_result
));
1012 else if (GET_MODE (target
) == SImode
1013 && GET_MODE (compare_result
) == QImode
)
1014 emit_insn (gen_extend_compare (target
, compare_result
));
1016 emit_move_insn (target
, compare_result
);
1021 const_double_to_hwint (rtx x
)
1025 if (GET_MODE (x
) == SFmode
)
1027 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1028 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1030 else if (GET_MODE (x
) == DFmode
)
1033 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1034 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1036 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1044 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1048 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1051 tv
[0] = (v
<< 32) >> 32;
1052 else if (mode
== DFmode
)
1054 tv
[1] = (v
<< 32) >> 32;
1057 real_from_target (&rv
, tv
, mode
);
1058 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1062 print_operand_address (FILE * file
, register rtx addr
)
1067 if (GET_CODE (addr
) == AND
1068 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1069 && INTVAL (XEXP (addr
, 1)) == -16)
1070 addr
= XEXP (addr
, 0);
1072 switch (GET_CODE (addr
))
1075 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1079 reg
= XEXP (addr
, 0);
1080 offset
= XEXP (addr
, 1);
1081 if (GET_CODE (offset
) == REG
)
1083 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1084 reg_names
[REGNO (offset
)]);
1086 else if (GET_CODE (offset
) == CONST_INT
)
1088 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1089 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1099 output_addr_const (file
, addr
);
1109 print_operand (FILE * file
, rtx x
, int code
)
1111 enum machine_mode mode
= GET_MODE (x
);
1113 unsigned char arr
[16];
1114 int xcode
= GET_CODE (x
);
1116 if (GET_MODE (x
) == VOIDmode
)
1119 case 'L': /* 128 bits, signed */
1120 case 'm': /* 128 bits, signed */
1121 case 'T': /* 128 bits, signed */
1122 case 't': /* 128 bits, signed */
1125 case 'K': /* 64 bits, signed */
1126 case 'k': /* 64 bits, signed */
1127 case 'D': /* 64 bits, signed */
1128 case 'd': /* 64 bits, signed */
1131 case 'J': /* 32 bits, signed */
1132 case 'j': /* 32 bits, signed */
1133 case 's': /* 32 bits, signed */
1134 case 'S': /* 32 bits, signed */
1141 case 'j': /* 32 bits, signed */
1142 case 'k': /* 64 bits, signed */
1143 case 'm': /* 128 bits, signed */
1144 if (xcode
== CONST_INT
1145 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1147 gcc_assert (logical_immediate_p (x
, mode
));
1148 constant_to_array (mode
, x
, arr
);
1149 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1150 val
= trunc_int_for_mode (val
, SImode
);
1151 switch (which_logical_immediate (val
))
1156 fprintf (file
, "h");
1159 fprintf (file
, "b");
1169 case 'J': /* 32 bits, signed */
1170 case 'K': /* 64 bits, signed */
1171 case 'L': /* 128 bits, signed */
1172 if (xcode
== CONST_INT
1173 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1175 gcc_assert (logical_immediate_p (x
, mode
)
1176 || iohl_immediate_p (x
, mode
));
1177 constant_to_array (mode
, x
, arr
);
1178 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1179 val
= trunc_int_for_mode (val
, SImode
);
1180 switch (which_logical_immediate (val
))
1186 val
= trunc_int_for_mode (val
, HImode
);
1189 val
= trunc_int_for_mode (val
, QImode
);
1194 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1200 case 't': /* 128 bits, signed */
1201 case 'd': /* 64 bits, signed */
1202 case 's': /* 32 bits, signed */
1205 enum immediate_class c
= classify_immediate (x
, mode
);
1209 constant_to_array (mode
, x
, arr
);
1210 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1211 val
= trunc_int_for_mode (val
, SImode
);
1212 switch (which_immediate_load (val
))
1217 fprintf (file
, "a");
1220 fprintf (file
, "h");
1223 fprintf (file
, "hu");
1230 constant_to_array (mode
, x
, arr
);
1231 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1233 fprintf (file
, "b");
1235 fprintf (file
, "h");
1237 fprintf (file
, "w");
1239 fprintf (file
, "d");
1242 if (xcode
== CONST_VECTOR
)
1244 x
= CONST_VECTOR_ELT (x
, 0);
1245 xcode
= GET_CODE (x
);
1247 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1248 fprintf (file
, "a");
1249 else if (xcode
== HIGH
)
1250 fprintf (file
, "hu");
1264 case 'T': /* 128 bits, signed */
1265 case 'D': /* 64 bits, signed */
1266 case 'S': /* 32 bits, signed */
1269 enum immediate_class c
= classify_immediate (x
, mode
);
1273 constant_to_array (mode
, x
, arr
);
1274 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1275 val
= trunc_int_for_mode (val
, SImode
);
1276 switch (which_immediate_load (val
))
1283 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1288 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1291 constant_to_array (mode
, x
, arr
);
1293 for (i
= 0; i
< 16; i
++)
1298 print_operand (file
, GEN_INT (val
), 0);
1301 constant_to_array (mode
, x
, arr
);
1302 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1303 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1308 if (GET_CODE (x
) == CONST_VECTOR
)
1309 x
= CONST_VECTOR_ELT (x
, 0);
1310 output_addr_const (file
, x
);
1312 fprintf (file
, "@h");
1326 if (xcode
== CONST_INT
)
1328 /* Only 4 least significant bits are relevant for generate
1329 control word instructions. */
1330 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1335 case 'M': /* print code for c*d */
1336 if (GET_CODE (x
) == CONST_INT
)
1340 fprintf (file
, "b");
1343 fprintf (file
, "h");
1346 fprintf (file
, "w");
1349 fprintf (file
, "d");
1358 case 'N': /* Negate the operand */
1359 if (xcode
== CONST_INT
)
1360 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1361 else if (xcode
== CONST_VECTOR
)
1362 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1363 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1366 case 'I': /* enable/disable interrupts */
1367 if (xcode
== CONST_INT
)
1368 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1371 case 'b': /* branch modifiers */
1373 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1374 else if (COMPARISON_P (x
))
1375 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1378 case 'i': /* indirect call */
1381 if (GET_CODE (XEXP (x
, 0)) == REG
)
1382 /* Used in indirect function calls. */
1383 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1385 output_address (XEXP (x
, 0));
1389 case 'p': /* load/store */
1393 xcode
= GET_CODE (x
);
1398 xcode
= GET_CODE (x
);
1401 fprintf (file
, "d");
1402 else if (xcode
== CONST_INT
)
1403 fprintf (file
, "a");
1404 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1405 fprintf (file
, "r");
1406 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1408 if (GET_CODE (XEXP (x
, 1)) == REG
)
1409 fprintf (file
, "x");
1411 fprintf (file
, "d");
1416 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1418 output_addr_const (file
, GEN_INT (val
));
1422 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1424 output_addr_const (file
, GEN_INT (val
));
1428 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1430 output_addr_const (file
, GEN_INT (val
));
1434 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1435 val
= (val
>> 3) & 0x1f;
1436 output_addr_const (file
, GEN_INT (val
));
1440 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1443 output_addr_const (file
, GEN_INT (val
));
1447 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1450 output_addr_const (file
, GEN_INT (val
));
1454 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1457 output_addr_const (file
, GEN_INT (val
));
1461 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1462 val
= -(val
& -8ll);
1463 val
= (val
>> 3) & 0x1f;
1464 output_addr_const (file
, GEN_INT (val
));
1469 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1470 else if (xcode
== MEM
)
1471 output_address (XEXP (x
, 0));
1472 else if (xcode
== CONST_VECTOR
)
1473 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1475 output_addr_const (file
, x
);
1482 output_operand_lossage ("invalid %%xn code");
1487 extern char call_used_regs
[];
1489 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1490 caller saved register. For leaf functions it is more efficient to
1491 use a volatile register because we won't need to save and restore the
1492 pic register. This routine is only valid after register allocation
1493 is completed, so we can pick an unused register. */
1497 rtx pic_reg
= pic_offset_table_rtx
;
1498 if (!reload_completed
&& !reload_in_progress
)
1503 /* Split constant addresses to handle cases that are too large.
1504 Add in the pic register when in PIC mode.
1505 Split immediates that require more than 1 instruction. */
1507 spu_split_immediate (rtx
* ops
)
1509 enum machine_mode mode
= GET_MODE (ops
[0]);
1510 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1516 unsigned char arrhi
[16];
1517 unsigned char arrlo
[16];
1520 constant_to_array (mode
, ops
[1], arrhi
);
1521 to
= !can_create_pseudo_p () ? ops
[0] : gen_reg_rtx (mode
);
1522 for (i
= 0; i
< 16; i
+= 4)
1524 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1525 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1526 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1527 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1529 hi
= array_to_constant (mode
, arrhi
);
1530 lo
= array_to_constant (mode
, arrlo
);
1531 emit_move_insn (to
, hi
);
1532 emit_insn (gen_rtx_SET
1533 (VOIDmode
, ops
[0], gen_rtx_IOR (mode
, to
, lo
)));
1538 unsigned char arr_fsmbi
[16];
1539 unsigned char arr_andbi
[16];
1540 rtx to
, reg_fsmbi
, reg_and
;
1542 enum machine_mode imode
= mode
;
1543 /* We need to do reals as ints because the constant used in the
1544 * AND might not be a legitimate real constant. */
1545 imode
= int_mode_for_mode (mode
);
1546 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1548 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1551 for (i
= 0; i
< 16; i
++)
1552 if (arr_fsmbi
[i
] != 0)
1554 arr_andbi
[0] = arr_fsmbi
[i
];
1555 arr_fsmbi
[i
] = 0xff;
1557 for (i
= 1; i
< 16; i
++)
1558 arr_andbi
[i
] = arr_andbi
[0];
1559 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1560 reg_and
= array_to_constant (imode
, arr_andbi
);
1561 emit_move_insn (to
, reg_fsmbi
);
1562 emit_insn (gen_rtx_SET
1563 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1567 if (reload_in_progress
|| reload_completed
)
1569 rtx mem
= force_const_mem (mode
, ops
[1]);
1570 if (TARGET_LARGE_MEM
)
1572 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1573 emit_move_insn (addr
, XEXP (mem
, 0));
1574 mem
= replace_equiv_address (mem
, addr
);
1576 emit_move_insn (ops
[0], mem
);
1582 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1586 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1587 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1590 emit_insn (gen_pic (ops
[0], ops
[1]));
1593 rtx pic_reg
= get_pic_reg ();
1594 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1595 crtl
->uses_pic_offset_table
= 1;
1597 return flag_pic
|| c
== IC_IL2s
;
1608 /* SAVING is TRUE when we are generating the actual load and store
1609 instructions for REGNO. When determining the size of the stack
1610 needed for saving register we must allocate enough space for the
1611 worst case, because we don't always have the information early enough
1612 to not allocate it. But we can at least eliminate the actual loads
1613 and stores during the prologue/epilogue. */
1615 need_to_save_reg (int regno
, int saving
)
1617 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1620 && regno
== PIC_OFFSET_TABLE_REGNUM
1621 && (!saving
|| crtl
->uses_pic_offset_table
)
1623 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1628 /* This function is only correct starting with local register
1631 spu_saved_regs_size (void)
1633 int reg_save_size
= 0;
1636 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1637 if (need_to_save_reg (regno
, 0))
1638 reg_save_size
+= 0x10;
1639 return reg_save_size
;
1643 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1645 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1647 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1648 return emit_insn (gen_movv4si (mem
, reg
));
1652 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1654 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1656 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1657 return emit_insn (gen_movv4si (reg
, mem
));
1660 /* This happens after reload, so we need to expand it. */
1662 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1665 if (satisfies_constraint_K (GEN_INT (imm
)))
1667 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1671 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1672 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1673 if (REGNO (src
) == REGNO (scratch
))
1679 /* Return nonzero if this function is known to have a null epilogue. */
1682 direct_return (void)
1684 if (reload_completed
)
1686 if (cfun
->static_chain_decl
== 0
1687 && (spu_saved_regs_size ()
1689 + crtl
->outgoing_args_size
1690 + crtl
->args
.pretend_args_size
== 0)
1691 && current_function_is_leaf
)
1698 The stack frame looks like this:
1705 prev SP | back chain |
1708 | reg save | crtl->args.pretend_args_size bytes
1711 | saved regs | spu_saved_regs_size() bytes
1714 FP | vars | get_frame_size() bytes
1718 | args | crtl->outgoing_args_size bytes
1728 spu_expand_prologue (void)
1730 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1731 HOST_WIDE_INT total_size
;
1732 HOST_WIDE_INT saved_regs_size
;
1733 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1734 rtx scratch_reg_0
, scratch_reg_1
;
1737 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1738 the "toplevel" insn chain. */
1739 emit_note (NOTE_INSN_DELETED
);
1741 if (flag_pic
&& optimize
== 0)
1742 crtl
->uses_pic_offset_table
= 1;
1744 if (spu_naked_function_p (current_function_decl
))
1747 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1748 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1750 saved_regs_size
= spu_saved_regs_size ();
1751 total_size
= size
+ saved_regs_size
1752 + crtl
->outgoing_args_size
1753 + crtl
->args
.pretend_args_size
;
1755 if (!current_function_is_leaf
1756 || cfun
->calls_alloca
|| total_size
> 0)
1757 total_size
+= STACK_POINTER_OFFSET
;
1759 /* Save this first because code after this might use the link
1760 register as a scratch register. */
1761 if (!current_function_is_leaf
)
1763 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1764 RTX_FRAME_RELATED_P (insn
) = 1;
1769 offset
= -crtl
->args
.pretend_args_size
;
1770 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1771 if (need_to_save_reg (regno
, 1))
1774 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1775 RTX_FRAME_RELATED_P (insn
) = 1;
1779 if (flag_pic
&& crtl
->uses_pic_offset_table
)
1781 rtx pic_reg
= get_pic_reg ();
1782 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1783 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1788 if (flag_stack_check
)
1790 /* We compare against total_size-1 because
1791 ($sp >= total_size) <=> ($sp > total_size-1) */
1792 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1793 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1794 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1795 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1797 emit_move_insn (scratch_v4si
, size_v4si
);
1798 size_v4si
= scratch_v4si
;
1800 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1801 emit_insn (gen_vec_extractv4si
1802 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1803 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1806 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1807 the value of the previous $sp because we save it as the back
1809 if (total_size
<= 2000)
1811 /* In this case we save the back chain first. */
1812 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1814 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1816 else if (satisfies_constraint_K (GEN_INT (-total_size
)))
1818 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1820 emit_insn (gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
)));
1824 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1826 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1828 RTX_FRAME_RELATED_P (insn
) = 1;
1829 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1831 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, real
, REG_NOTES (insn
));
1833 if (total_size
> 2000)
1835 /* Save the back chain ptr */
1836 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1839 if (frame_pointer_needed
)
1841 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1842 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1843 + crtl
->outgoing_args_size
;
1844 /* Set the new frame_pointer */
1845 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1846 RTX_FRAME_RELATED_P (insn
) = 1;
1847 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1849 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1850 real
, REG_NOTES (insn
));
1851 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1855 emit_note (NOTE_INSN_DELETED
);
1859 spu_expand_epilogue (bool sibcall_p
)
1861 int size
= get_frame_size (), offset
, regno
;
1862 HOST_WIDE_INT saved_regs_size
, total_size
;
1863 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1864 rtx jump
, scratch_reg_0
;
1866 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1867 the "toplevel" insn chain. */
1868 emit_note (NOTE_INSN_DELETED
);
1870 if (spu_naked_function_p (current_function_decl
))
1873 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1875 saved_regs_size
= spu_saved_regs_size ();
1876 total_size
= size
+ saved_regs_size
1877 + crtl
->outgoing_args_size
1878 + crtl
->args
.pretend_args_size
;
1880 if (!current_function_is_leaf
1881 || cfun
->calls_alloca
|| total_size
> 0)
1882 total_size
+= STACK_POINTER_OFFSET
;
1886 if (cfun
->calls_alloca
)
1887 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1889 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1892 if (saved_regs_size
> 0)
1894 offset
= -crtl
->args
.pretend_args_size
;
1895 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1896 if (need_to_save_reg (regno
, 1))
1899 frame_emit_load (regno
, sp_reg
, offset
);
1904 if (!current_function_is_leaf
)
1905 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1909 emit_use (gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
));
1910 jump
= emit_jump_insn (gen__return ());
1911 emit_barrier_after (jump
);
1914 emit_note (NOTE_INSN_DELETED
);
1918 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1922 /* This is inefficient because it ends up copying to a save-register
1923 which then gets saved even though $lr has already been saved. But
1924 it does generate better code for leaf functions and we don't need
1925 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1926 used for __builtin_return_address anyway, so maybe we don't care if
1927 it's inefficient. */
1928 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1932 /* Given VAL, generate a constant appropriate for MODE.
1933 If MODE is a vector mode, every element will be VAL.
1934 For TImode, VAL will be zero extended to 128 bits. */
1936 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
1942 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1943 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1944 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1945 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1947 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1948 return immed_double_const (val
, 0, mode
);
1950 /* val is the bit representation of the float */
1951 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1952 return hwint_to_const_double (mode
, val
);
1954 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1955 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1957 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1959 units
= GET_MODE_NUNITS (mode
);
1961 v
= rtvec_alloc (units
);
1963 for (i
= 0; i
< units
; ++i
)
1964 RTVEC_ELT (v
, i
) = inner
;
1966 return gen_rtx_CONST_VECTOR (mode
, v
);
1969 /* branch hint stuff */
1971 /* The hardware requires 8 insns between a hint and the branch it
1972 effects. This variable describes how many rtl instructions the
1973 compiler needs to see before inserting a hint. (FIXME: We should
1974 accept less and insert nops to enforce it because hinting is always
1975 profitable for performance, but we do need to be careful of code
1977 int spu_hint_dist
= (8 * 4);
1979 /* Create a MODE vector constant from 4 ints. */
1981 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
1983 unsigned char arr
[16];
1984 arr
[0] = (a
>> 24) & 0xff;
1985 arr
[1] = (a
>> 16) & 0xff;
1986 arr
[2] = (a
>> 8) & 0xff;
1987 arr
[3] = (a
>> 0) & 0xff;
1988 arr
[4] = (b
>> 24) & 0xff;
1989 arr
[5] = (b
>> 16) & 0xff;
1990 arr
[6] = (b
>> 8) & 0xff;
1991 arr
[7] = (b
>> 0) & 0xff;
1992 arr
[8] = (c
>> 24) & 0xff;
1993 arr
[9] = (c
>> 16) & 0xff;
1994 arr
[10] = (c
>> 8) & 0xff;
1995 arr
[11] = (c
>> 0) & 0xff;
1996 arr
[12] = (d
>> 24) & 0xff;
1997 arr
[13] = (d
>> 16) & 0xff;
1998 arr
[14] = (d
>> 8) & 0xff;
1999 arr
[15] = (d
>> 0) & 0xff;
2000 return array_to_constant(mode
, arr
);
2003 /* An array of these is used to propagate hints to predecessor blocks. */
2006 rtx prop_jump
; /* propagated from another block */
2007 basic_block bb
; /* the original block. */
2010 /* The special $hbr register is used to prevent the insn scheduler from
2011 moving hbr insns across instructions which invalidate them. It
2012 should only be used in a clobber, and this function searches for
2013 insns which clobber it. */
2015 insn_clobbers_hbr (rtx insn
)
2017 if (INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2019 rtx parallel
= PATTERN (insn
);
2022 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2024 clobber
= XVECEXP (parallel
, 0, j
);
2025 if (GET_CODE (clobber
) == CLOBBER
2026 && GET_CODE (XEXP (clobber
, 0)) == REG
2027 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2035 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
, int distance
)
2038 rtx hint
, insn
, prev
, next
;
2040 if (before
== 0 || branch
== 0 || target
== 0)
2047 branch_label
= gen_label_rtx ();
2048 LABEL_NUSES (branch_label
)++;
2049 LABEL_PRESERVE_P (branch_label
) = 1;
2050 insn
= emit_label_before (branch_label
, branch
);
2051 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2053 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2054 the current insn is pipe0, dual issue with it. */
2055 prev
= prev_active_insn (before
);
2056 if (prev
&& get_pipe (prev
) == 0)
2057 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2058 else if (get_pipe (before
) == 0 && distance
> spu_hint_dist
)
2060 next
= next_active_insn (before
);
2061 hint
= emit_insn_after (gen_hbr (branch_label
, target
), before
);
2063 PUT_MODE (next
, TImode
);
2067 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2068 PUT_MODE (hint
, TImode
);
2070 recog_memoized (hint
);
2073 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2074 the rtx for the branch target. */
2076 get_branch_target (rtx branch
)
2078 if (GET_CODE (branch
) == JUMP_INSN
)
2082 /* Return statements */
2083 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2084 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2087 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2088 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2091 set
= single_set (branch
);
2092 src
= SET_SRC (set
);
2093 if (GET_CODE (SET_DEST (set
)) != PC
)
2096 if (GET_CODE (src
) == IF_THEN_ELSE
)
2099 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2102 /* If the more probable case is not a fall through, then
2103 try a branch hint. */
2104 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2105 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2106 && GET_CODE (XEXP (src
, 1)) != PC
)
2107 lab
= XEXP (src
, 1);
2108 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2109 && GET_CODE (XEXP (src
, 2)) != PC
)
2110 lab
= XEXP (src
, 2);
2114 if (GET_CODE (lab
) == RETURN
)
2115 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2123 else if (GET_CODE (branch
) == CALL_INSN
)
2126 /* All of our call patterns are in a PARALLEL and the CALL is
2127 the first pattern in the PARALLEL. */
2128 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2130 call
= XVECEXP (PATTERN (branch
), 0, 0);
2131 if (GET_CODE (call
) == SET
)
2132 call
= SET_SRC (call
);
2133 if (GET_CODE (call
) != CALL
)
2135 return XEXP (XEXP (call
, 0), 0);
2141 insert_branch_hints (void)
2143 struct spu_bb_info
*spu_bb_info
;
2144 rtx branch
, insn
, next
;
2145 rtx branch_target
= 0;
2146 int branch_addr
= 0, insn_addr
, head_addr
;
2151 (struct spu_bb_info
*) xcalloc (last_basic_block
+ 1,
2152 sizeof (struct spu_bb_info
));
2154 /* We need exact insn addresses and lengths. */
2155 shorten_branches (get_insns ());
2157 FOR_EACH_BB_REVERSE (bb
)
2159 head_addr
= INSN_ADDRESSES (INSN_UID (BB_HEAD (bb
)));
2161 if (spu_bb_info
[bb
->index
].prop_jump
)
2163 branch
= spu_bb_info
[bb
->index
].prop_jump
;
2164 branch_target
= get_branch_target (branch
);
2165 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2167 /* Search from end of a block to beginning. In this loop, find
2168 jumps which need a branch and emit them only when:
2169 - it's an indirect branch and we're at the insn which sets
2171 - we're at an insn that will invalidate the hint. e.g., a
2172 call, another hint insn, inline asm that clobbers $hbr, and
2173 some inlined operations (divmodsi4). Don't consider jumps
2174 because they are only at the end of a block and are
2175 considered when we are deciding whether to propagate
2176 - we're getting too far away from the branch. The hbr insns
2177 only have a signed 10-bit offset
2178 We go back as far as possible so the branch will be considered
2179 for propagation when we get to the beginning of the block. */
2181 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2185 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2187 && ((GET_CODE (branch_target
) == REG
2188 && set_of (branch_target
, insn
) != NULL_RTX
)
2189 || insn_clobbers_hbr (insn
)
2190 || branch_addr
- insn_addr
> 600))
2192 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2193 if (insn
!= BB_END (bb
)
2194 && branch_addr
- next_addr
>= spu_hint_dist
)
2198 "hint for %i in block %i before %i\n",
2199 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2200 spu_emit_branch_hint (next
, branch
, branch_target
,
2201 branch_addr
- next_addr
);
2206 /* JUMP_P will only be true at the end of a block. When
2207 branch is already set it means we've previously decided
2208 to propagate a hint for that branch into this block. */
2209 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2212 if ((branch_target
= get_branch_target (insn
)))
2215 branch_addr
= insn_addr
;
2219 /* When a branch hint is emitted it will be inserted
2220 before "next". Make sure next is the beginning of a
2221 cycle to minimize impact on the scheduled insns. */
2222 if (GET_MODE (insn
) == TImode
)
2225 if (insn
== BB_HEAD (bb
))
2231 /* If we haven't emitted a hint for this branch yet, it might
2232 be profitable to emit it in one of the predecessor blocks,
2233 especially for loops. */
2235 basic_block prev
= 0, prop
= 0, prev2
= 0;
2236 int loop_exit
= 0, simple_loop
= 0;
2239 next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2241 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2242 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2243 prev
= EDGE_PRED (bb
, j
)->src
;
2245 prev2
= EDGE_PRED (bb
, j
)->src
;
2247 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2248 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2250 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2253 /* If this branch is a loop exit then propagate to previous
2254 fallthru block. This catches the cases when it is a simple
2255 loop or when there is an initial branch into the loop. */
2256 if (prev
&& loop_exit
&& prev
->loop_depth
<= bb
->loop_depth
)
2259 /* If there is only one adjacent predecessor. Don't propagate
2260 outside this loop. This loop_depth test isn't perfect, but
2261 I'm not sure the loop_father member is valid at this point. */
2262 else if (prev
&& single_pred_p (bb
)
2263 && prev
->loop_depth
== bb
->loop_depth
)
2266 /* If this is the JOIN block of a simple IF-THEN then
2267 propagate the hint to the HEADER block. */
2268 else if (prev
&& prev2
2269 && EDGE_COUNT (bb
->preds
) == 2
2270 && EDGE_COUNT (prev
->preds
) == 1
2271 && EDGE_PRED (prev
, 0)->src
== prev2
2272 && prev2
->loop_depth
== bb
->loop_depth
2273 && GET_CODE (branch_target
) != REG
)
2276 /* Don't propagate when:
2277 - this is a simple loop and the hint would be too far
2278 - this is not a simple loop and there are 16 insns in
2280 - the predecessor block ends in a branch that will be
2282 - the predecessor block ends in an insn that invalidates
2286 && (bbend
= BB_END (prop
))
2287 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2288 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2289 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2292 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2293 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2294 bb
->index
, prop
->index
, bb
->loop_depth
,
2295 INSN_UID (branch
), loop_exit
, simple_loop
,
2296 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2298 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2299 spu_bb_info
[prop
->index
].bb
= bb
;
2301 else if (next
&& branch_addr
- next_addr
>= spu_hint_dist
)
2304 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2305 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2306 spu_emit_branch_hint (next
, branch
, branch_target
,
2307 branch_addr
- next_addr
);
2315 /* Emit a nop for INSN such that the two will dual issue. This assumes
2316 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2317 We check for TImode to handle a MULTI1 insn which has dual issued its
2318 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2321 emit_nop_for_insn (rtx insn
)
2325 p
= get_pipe (insn
);
2326 if (p
== 1 && GET_MODE (insn
) == TImode
)
2328 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2329 PUT_MODE (new_insn
, TImode
);
2330 PUT_MODE (insn
, VOIDmode
);
2333 new_insn
= emit_insn_after (gen_lnop (), insn
);
2336 /* Insert nops in basic blocks to meet dual issue alignment
2341 rtx insn
, next_insn
, prev_insn
;
2345 /* This sets up INSN_ADDRESSES. */
2346 shorten_branches (get_insns ());
2348 /* Keep track of length added by nops. */
2352 for (insn
= get_insns (); insn
; insn
= next_insn
)
2354 next_insn
= next_active_insn (insn
);
2355 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2356 if (GET_MODE (insn
) == TImode
2358 && GET_MODE (next_insn
) != TImode
2359 && ((addr
+ length
) & 7) != 0)
2361 /* prev_insn will always be set because the first insn is
2362 always 8-byte aligned. */
2363 emit_nop_for_insn (prev_insn
);
2371 spu_machine_dependent_reorg (void)
2375 if (TARGET_BRANCH_HINTS
)
2376 insert_branch_hints ();
2382 /* Insn scheduling routines, primarily for dual issue. */
2384 spu_sched_issue_rate (void)
2390 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED
,
2391 int verbose ATTRIBUTE_UNUSED
, rtx insn
,
2394 if (GET_CODE (PATTERN (insn
)) != USE
2395 && GET_CODE (PATTERN (insn
)) != CLOBBER
2396 && get_pipe (insn
) != -2)
2398 return can_issue_more
;
2405 /* Handle inline asm */
2406 if (INSN_CODE (insn
) == -1)
2408 t
= get_attr_type (insn
);
2424 case TYPE_IPREFETCH
:
2441 spu_sched_adjust_priority (rtx insn
, int pri
)
2443 int p
= get_pipe (insn
);
2444 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2446 if (GET_CODE (PATTERN (insn
)) == USE
2447 || GET_CODE (PATTERN (insn
)) == CLOBBER
2450 /* Schedule pipe0 insns early for greedier dual issue. */
2456 /* INSN is dependent on DEP_INSN. */
2458 spu_sched_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
,
2459 rtx dep_insn ATTRIBUTE_UNUSED
, int cost
)
2461 if (GET_CODE (insn
) == CALL_INSN
)
2463 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2464 scheduler makes every insn in a block anti-dependent on the final
2465 jump_insn. We adjust here so higher cost insns will get scheduled
2467 if (GET_CODE (insn
) == JUMP_INSN
&& REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
2468 return insn_cost (dep_insn
) - 3;
2472 /* Create a CONST_DOUBLE from a string. */
2474 spu_float_const (const char *string
, enum machine_mode mode
)
2476 REAL_VALUE_TYPE value
;
2477 value
= REAL_VALUE_ATOF (string
, mode
);
2478 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
2482 spu_constant_address_p (rtx x
)
2484 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
2485 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
2486 || GET_CODE (x
) == HIGH
);
2489 static enum spu_immediate
2490 which_immediate_load (HOST_WIDE_INT val
)
2492 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2494 if (val
>= -0x8000 && val
<= 0x7fff)
2496 if (val
>= 0 && val
<= 0x3ffff)
2498 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2500 if ((val
& 0xffff) == 0)
2506 /* Return true when OP can be loaded by one of the il instructions, or
2507 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2509 immediate_load_p (rtx op
, enum machine_mode mode
)
2511 if (CONSTANT_P (op
))
2513 enum immediate_class c
= classify_immediate (op
, mode
);
2514 return c
== IC_IL1
|| c
== IC_IL1s
2515 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
2520 /* Return true if the first SIZE bytes of arr is a constant that can be
2521 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2522 represent the size and offset of the instruction to use. */
2524 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
2526 int cpat
, run
, i
, start
;
2530 for (i
= 0; i
< size
&& cpat
; i
++)
2538 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
2540 else if (arr
[i
] == 0)
2542 while (arr
[i
+run
] == run
&& i
+run
< 16)
2544 if (run
!= 4 && run
!= 8)
2549 if ((i
& (run
-1)) != 0)
2556 if (cpat
&& (run
|| size
< 16))
2563 *pstart
= start
== -1 ? 16-run
: start
;
2569 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2570 it into a register. MODE is only valid when OP is a CONST_INT. */
2571 static enum immediate_class
2572 classify_immediate (rtx op
, enum machine_mode mode
)
2575 unsigned char arr
[16];
2576 int i
, j
, repeated
, fsmbi
, repeat
;
2578 gcc_assert (CONSTANT_P (op
));
2580 if (GET_MODE (op
) != VOIDmode
)
2581 mode
= GET_MODE (op
);
2583 /* A V4SI const_vector with all identical symbols is ok. */
2586 && GET_CODE (op
) == CONST_VECTOR
2587 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
2588 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
2589 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
2590 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
2591 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
2592 op
= CONST_VECTOR_ELT (op
, 0);
2594 switch (GET_CODE (op
))
2598 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
2601 /* We can never know if the resulting address fits in 18 bits and can be
2602 loaded with ila. For now, assume the address will not overflow if
2603 the displacement is "small" (fits 'K' constraint). */
2604 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (op
, 0)) == PLUS
)
2606 rtx sym
= XEXP (XEXP (op
, 0), 0);
2607 rtx cst
= XEXP (XEXP (op
, 0), 1);
2609 if (GET_CODE (sym
) == SYMBOL_REF
2610 && GET_CODE (cst
) == CONST_INT
2611 && satisfies_constraint_K (cst
))
2620 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
2621 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
2622 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
2628 constant_to_array (mode
, op
, arr
);
2630 /* Check that each 4-byte slot is identical. */
2632 for (i
= 4; i
< 16; i
+= 4)
2633 for (j
= 0; j
< 4; j
++)
2634 if (arr
[j
] != arr
[i
+ j
])
2639 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2640 val
= trunc_int_for_mode (val
, SImode
);
2642 if (which_immediate_load (val
) != SPU_NONE
)
2646 /* Any mode of 2 bytes or smaller can be loaded with an il
2648 gcc_assert (GET_MODE_SIZE (mode
) > 2);
2652 for (i
= 0; i
< 16 && fsmbi
; i
++)
2653 if (arr
[i
] != 0 && repeat
== 0)
2655 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
2658 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
2660 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
2673 static enum spu_immediate
2674 which_logical_immediate (HOST_WIDE_INT val
)
2676 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2678 if (val
>= -0x200 && val
<= 0x1ff)
2680 if (val
>= 0 && val
<= 0xffff)
2682 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2684 val
= trunc_int_for_mode (val
, HImode
);
2685 if (val
>= -0x200 && val
<= 0x1ff)
2687 if ((val
& 0xff) == ((val
>> 8) & 0xff))
2689 val
= trunc_int_for_mode (val
, QImode
);
2690 if (val
>= -0x200 && val
<= 0x1ff)
2697 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2700 const_vector_immediate_p (rtx x
)
2703 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
2704 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
2705 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
2706 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
2712 logical_immediate_p (rtx op
, enum machine_mode mode
)
2715 unsigned char arr
[16];
2718 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2719 || GET_CODE (op
) == CONST_VECTOR
);
2721 if (GET_CODE (op
) == CONST_VECTOR
2722 && !const_vector_immediate_p (op
))
2725 if (GET_MODE (op
) != VOIDmode
)
2726 mode
= GET_MODE (op
);
2728 constant_to_array (mode
, op
, arr
);
2730 /* Check that bytes are repeated. */
2731 for (i
= 4; i
< 16; i
+= 4)
2732 for (j
= 0; j
< 4; j
++)
2733 if (arr
[j
] != arr
[i
+ j
])
2736 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2737 val
= trunc_int_for_mode (val
, SImode
);
2739 i
= which_logical_immediate (val
);
2740 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
2744 iohl_immediate_p (rtx op
, enum machine_mode mode
)
2747 unsigned char arr
[16];
2750 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2751 || GET_CODE (op
) == CONST_VECTOR
);
2753 if (GET_CODE (op
) == CONST_VECTOR
2754 && !const_vector_immediate_p (op
))
2757 if (GET_MODE (op
) != VOIDmode
)
2758 mode
= GET_MODE (op
);
2760 constant_to_array (mode
, op
, arr
);
2762 /* Check that bytes are repeated. */
2763 for (i
= 4; i
< 16; i
+= 4)
2764 for (j
= 0; j
< 4; j
++)
2765 if (arr
[j
] != arr
[i
+ j
])
2768 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2769 val
= trunc_int_for_mode (val
, SImode
);
2771 return val
>= 0 && val
<= 0xffff;
2775 arith_immediate_p (rtx op
, enum machine_mode mode
,
2776 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
2779 unsigned char arr
[16];
2782 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2783 || GET_CODE (op
) == CONST_VECTOR
);
2785 if (GET_CODE (op
) == CONST_VECTOR
2786 && !const_vector_immediate_p (op
))
2789 if (GET_MODE (op
) != VOIDmode
)
2790 mode
= GET_MODE (op
);
2792 constant_to_array (mode
, op
, arr
);
2794 if (VECTOR_MODE_P (mode
))
2795 mode
= GET_MODE_INNER (mode
);
2797 bytes
= GET_MODE_SIZE (mode
);
2798 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
2800 /* Check that bytes are repeated. */
2801 for (i
= bytes
; i
< 16; i
+= bytes
)
2802 for (j
= 0; j
< bytes
; j
++)
2803 if (arr
[j
] != arr
[i
+ j
])
2807 for (j
= 1; j
< bytes
; j
++)
2808 val
= (val
<< 8) | arr
[j
];
2810 val
= trunc_int_for_mode (val
, mode
);
2812 return val
>= low
&& val
<= high
;
2816 - any 32-bit constant (SImode, SFmode)
2817 - any constant that can be generated with fsmbi (any mode)
2818 - a 64-bit constant where the high and low bits are identical
2820 - a 128-bit constant where the four 32-bit words match. */
2822 spu_legitimate_constant_p (rtx x
)
2824 if (GET_CODE (x
) == HIGH
)
2826 /* V4SI with all identical symbols is valid. */
2828 && GET_MODE (x
) == V4SImode
2829 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
2830 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
2831 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
2832 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
2833 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
2834 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
2836 if (GET_CODE (x
) == CONST_VECTOR
2837 && !const_vector_immediate_p (x
))
2842 /* Valid address are:
2843 - symbol_ref, label_ref, const
2845 - reg + const, where either reg or const is 16 byte aligned
2846 - reg + reg, alignment doesn't matter
2847 The alignment matters in the reg+const case because lqd and stqd
2848 ignore the 4 least significant bits of the const. (TODO: It might be
2849 preferable to allow any alignment and fix it up when splitting.) */
2851 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED
,
2852 rtx x
, int reg_ok_strict
)
2854 if (mode
== TImode
&& GET_CODE (x
) == AND
2855 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2856 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) -16)
2858 switch (GET_CODE (x
))
2862 return !TARGET_LARGE_MEM
;
2865 if (!TARGET_LARGE_MEM
&& GET_CODE (XEXP (x
, 0)) == PLUS
)
2867 rtx sym
= XEXP (XEXP (x
, 0), 0);
2868 rtx cst
= XEXP (XEXP (x
, 0), 1);
2870 /* Accept any symbol_ref + constant, assuming it does not
2871 wrap around the local store addressability limit. */
2872 if (GET_CODE (sym
) == SYMBOL_REF
&& GET_CODE (cst
) == CONST_INT
)
2878 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
2882 gcc_assert (GET_CODE (x
) == REG
);
2885 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
2890 rtx op0
= XEXP (x
, 0);
2891 rtx op1
= XEXP (x
, 1);
2892 if (GET_CODE (op0
) == SUBREG
)
2893 op0
= XEXP (op0
, 0);
2894 if (GET_CODE (op1
) == SUBREG
)
2895 op1
= XEXP (op1
, 0);
2896 /* We can't just accept any aligned register because CSE can
2897 change it to a register that is not marked aligned and then
2898 recog will fail. So we only accept frame registers because
2899 they will only be changed to other frame registers. */
2900 if (GET_CODE (op0
) == REG
2901 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2902 && GET_CODE (op1
) == CONST_INT
2903 && INTVAL (op1
) >= -0x2000
2904 && INTVAL (op1
) <= 0x1fff
2905 && (regno_aligned_for_load (REGNO (op0
)) || (INTVAL (op1
) & 15) == 0))
2907 if (GET_CODE (op0
) == REG
2908 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2909 && GET_CODE (op1
) == REG
2910 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
2921 /* When the address is reg + const_int, force the const_int into a
2924 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
2925 enum machine_mode mode
)
2928 /* Make sure both operands are registers. */
2929 if (GET_CODE (x
) == PLUS
)
2933 if (ALIGNED_SYMBOL_REF_P (op0
))
2935 op0
= force_reg (Pmode
, op0
);
2936 mark_reg_pointer (op0
, 128);
2938 else if (GET_CODE (op0
) != REG
)
2939 op0
= force_reg (Pmode
, op0
);
2940 if (ALIGNED_SYMBOL_REF_P (op1
))
2942 op1
= force_reg (Pmode
, op1
);
2943 mark_reg_pointer (op1
, 128);
2945 else if (GET_CODE (op1
) != REG
)
2946 op1
= force_reg (Pmode
, op1
);
2947 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
2948 if (spu_legitimate_address (mode
, x
, 0))
2954 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2955 struct attribute_spec.handler. */
2957 spu_handle_fndecl_attribute (tree
* node
,
2959 tree args ATTRIBUTE_UNUSED
,
2960 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2962 if (TREE_CODE (*node
) != FUNCTION_DECL
)
2964 warning (0, "`%s' attribute only applies to functions",
2965 IDENTIFIER_POINTER (name
));
2966 *no_add_attrs
= true;
2972 /* Handle the "vector" attribute. */
2974 spu_handle_vector_attribute (tree
* node
, tree name
,
2975 tree args ATTRIBUTE_UNUSED
,
2976 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2978 tree type
= *node
, result
= NULL_TREE
;
2979 enum machine_mode mode
;
2982 while (POINTER_TYPE_P (type
)
2983 || TREE_CODE (type
) == FUNCTION_TYPE
2984 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
2985 type
= TREE_TYPE (type
);
2987 mode
= TYPE_MODE (type
);
2989 unsigned_p
= TYPE_UNSIGNED (type
);
2993 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
2996 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
2999 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3002 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3005 result
= V4SF_type_node
;
3008 result
= V2DF_type_node
;
3014 /* Propagate qualifiers attached to the element type
3015 onto the vector type. */
3016 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3017 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3019 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3022 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name
));
3024 *node
= lang_hooks
.types
.reconstruct_complex_type (*node
, result
);
3029 /* Return nonzero if FUNC is a naked function. */
3031 spu_naked_function_p (tree func
)
3035 if (TREE_CODE (func
) != FUNCTION_DECL
)
3038 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3039 return a
!= NULL_TREE
;
3043 spu_initial_elimination_offset (int from
, int to
)
3045 int saved_regs_size
= spu_saved_regs_size ();
3047 if (!current_function_is_leaf
|| crtl
->outgoing_args_size
3048 || get_frame_size () || saved_regs_size
)
3049 sp_offset
= STACK_POINTER_OFFSET
;
3050 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3051 return (sp_offset
+ crtl
->outgoing_args_size
);
3052 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3054 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3055 return sp_offset
+ crtl
->outgoing_args_size
3056 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3057 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3058 return get_frame_size () + saved_regs_size
+ sp_offset
;
3063 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3065 enum machine_mode mode
= TYPE_MODE (type
);
3066 int byte_size
= ((mode
== BLKmode
)
3067 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3069 /* Make sure small structs are left justified in a register. */
3070 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3071 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3073 enum machine_mode smode
;
3076 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3077 int n
= byte_size
/ UNITS_PER_WORD
;
3078 v
= rtvec_alloc (nregs
);
3079 for (i
= 0; i
< n
; i
++)
3081 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3082 gen_rtx_REG (TImode
,
3085 GEN_INT (UNITS_PER_WORD
* i
));
3086 byte_size
-= UNITS_PER_WORD
;
3094 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3096 gen_rtx_EXPR_LIST (VOIDmode
,
3097 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3098 GEN_INT (UNITS_PER_WORD
* n
));
3100 return gen_rtx_PARALLEL (mode
, v
);
3102 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3106 spu_function_arg (CUMULATIVE_ARGS cum
,
3107 enum machine_mode mode
,
3108 tree type
, int named ATTRIBUTE_UNUSED
)
3112 if (cum
>= MAX_REGISTER_ARGS
)
3115 byte_size
= ((mode
== BLKmode
)
3116 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3118 /* The ABI does not allow parameters to be passed partially in
3119 reg and partially in stack. */
3120 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3123 /* Make sure small structs are left justified in a register. */
3124 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3125 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3127 enum machine_mode smode
;
3131 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3132 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3133 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
3135 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3138 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
3141 /* Variable sized types are passed by reference. */
3143 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
3144 enum machine_mode mode ATTRIBUTE_UNUSED
,
3145 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3147 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3153 /* Create and return the va_list datatype.
3155 On SPU, va_list is an array type equivalent to
3157 typedef struct __va_list_tag
3159 void *__args __attribute__((__aligned(16)));
3160 void *__skip __attribute__((__aligned(16)));
3164 where __args points to the arg that will be returned by the next
3165 va_arg(), and __skip points to the previous stack frame such that
3166 when __args == __skip we should advance __args by 32 bytes. */
3168 spu_build_builtin_va_list (void)
3170 tree f_args
, f_skip
, record
, type_decl
;
3173 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3176 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3178 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3179 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3181 DECL_FIELD_CONTEXT (f_args
) = record
;
3182 DECL_ALIGN (f_args
) = 128;
3183 DECL_USER_ALIGN (f_args
) = 1;
3185 DECL_FIELD_CONTEXT (f_skip
) = record
;
3186 DECL_ALIGN (f_skip
) = 128;
3187 DECL_USER_ALIGN (f_skip
) = 1;
3189 TREE_CHAIN (record
) = type_decl
;
3190 TYPE_NAME (record
) = type_decl
;
3191 TYPE_FIELDS (record
) = f_args
;
3192 TREE_CHAIN (f_args
) = f_skip
;
3194 /* We know this is being padded and we want it too. It is an internal
3195 type so hide the warnings from the user. */
3197 warn_padded
= false;
3199 layout_type (record
);
3203 /* The correct type is an array type of one element. */
3204 return build_array_type (record
, build_index_type (size_zero_node
));
3207 /* Implement va_start by filling the va_list structure VALIST.
3208 NEXTARG points to the first anonymous stack argument.
3210 The following global variables are used to initialize
3211 the va_list structure:
3214 the CUMULATIVE_ARGS for this function
3216 crtl->args.arg_offset_rtx:
3217 holds the offset of the first anonymous stack argument
3218 (relative to the virtual arg pointer). */
3221 spu_va_start (tree valist
, rtx nextarg
)
3223 tree f_args
, f_skip
;
3226 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3227 f_skip
= TREE_CHAIN (f_args
);
3229 valist
= build_va_arg_indirect_ref (valist
);
3231 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3233 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3235 /* Find the __args area. */
3236 t
= make_tree (TREE_TYPE (args
), nextarg
);
3237 if (crtl
->args
.pretend_args_size
> 0)
3238 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
3239 size_int (-STACK_POINTER_OFFSET
));
3240 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, t
);
3241 TREE_SIDE_EFFECTS (t
) = 1;
3242 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3244 /* Find the __skip area. */
3245 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
3246 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
3247 size_int (crtl
->args
.pretend_args_size
3248 - STACK_POINTER_OFFSET
));
3249 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (skip
), skip
, t
);
3250 TREE_SIDE_EFFECTS (t
) = 1;
3251 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3254 /* Gimplify va_arg by updating the va_list structure
3255 VALIST as required to retrieve an argument of type
3256 TYPE, and returning that argument.
3258 ret = va_arg(VALIST, TYPE);
3260 generates code equivalent to:
3262 paddedsize = (sizeof(TYPE) + 15) & -16;
3263 if (VALIST.__args + paddedsize > VALIST.__skip
3264 && VALIST.__args <= VALIST.__skip)
3265 addr = VALIST.__skip + 32;
3267 addr = VALIST.__args;
3268 VALIST.__args = addr + paddedsize;
3269 ret = *(TYPE *)addr;
3272 spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
3273 tree
* post_p ATTRIBUTE_UNUSED
)
3275 tree f_args
, f_skip
;
3277 HOST_WIDE_INT size
, rsize
;
3278 tree paddedsize
, addr
, tmp
;
3279 bool pass_by_reference_p
;
3281 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3282 f_skip
= TREE_CHAIN (f_args
);
3284 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3286 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3288 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3290 addr
= create_tmp_var (ptr_type_node
, "va_arg");
3291 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3293 /* if an object is dynamically sized, a pointer to it is passed
3294 instead of the object itself. */
3295 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
3297 if (pass_by_reference_p
)
3298 type
= build_pointer_type (type
);
3299 size
= int_size_in_bytes (type
);
3300 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
3302 /* build conditional expression to calculate addr. The expression
3303 will be gimplified later. */
3304 paddedsize
= size_int (rsize
);
3305 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, args
, paddedsize
);
3306 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
3307 build2 (GT_EXPR
, boolean_type_node
, tmp
, skip
),
3308 build2 (LE_EXPR
, boolean_type_node
, args
, skip
));
3310 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
3311 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, skip
,
3312 size_int (32)), args
);
3314 tmp
= build2 (GIMPLE_MODIFY_STMT
, ptr_type_node
, addr
, tmp
);
3315 gimplify_and_add (tmp
, pre_p
);
3317 /* update VALIST.__args */
3318 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
3319 tmp
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, tmp
);
3320 gimplify_and_add (tmp
, pre_p
);
3322 addr
= fold_convert (build_pointer_type (type
), addr
);
3324 if (pass_by_reference_p
)
3325 addr
= build_va_arg_indirect_ref (addr
);
3327 return build_va_arg_indirect_ref (addr
);
3330 /* Save parameter registers starting with the register that corresponds
3331 to the first unnamed parameters. If the first unnamed parameter is
3332 in the stack then save no registers. Set pretend_args_size to the
3333 amount of space needed to save the registers. */
3335 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
3336 tree type
, int *pretend_size
, int no_rtl
)
3345 /* cum currently points to the last named argument, we want to
3346 start at the next argument. */
3347 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
3349 offset
= -STACK_POINTER_OFFSET
;
3350 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
3352 tmp
= gen_frame_mem (V4SImode
,
3353 plus_constant (virtual_incoming_args_rtx
,
3355 emit_move_insn (tmp
,
3356 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
3359 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
3364 spu_conditional_register_usage (void)
3368 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3369 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3373 /* This is called to decide when we can simplify a load instruction. We
3374 must only return true for registers which we know will always be
3375 aligned. Taking into account that CSE might replace this reg with
3376 another one that has not been marked aligned.
3377 So this is really only true for frame, stack and virtual registers,
3378 which we know are always aligned and should not be adversely effected
3381 regno_aligned_for_load (int regno
)
3383 return regno
== FRAME_POINTER_REGNUM
3384 || (frame_pointer_needed
&& regno
== HARD_FRAME_POINTER_REGNUM
)
3385 || regno
== STACK_POINTER_REGNUM
3386 || (regno
>= FIRST_VIRTUAL_REGISTER
3387 && regno
<= LAST_VIRTUAL_REGISTER
);
3390 /* Return TRUE when mem is known to be 16-byte aligned. */
3392 aligned_mem_p (rtx mem
)
3394 if (MEM_ALIGN (mem
) >= 128)
3396 if (GET_MODE_SIZE (GET_MODE (mem
)) >= 16)
3398 if (GET_CODE (XEXP (mem
, 0)) == PLUS
)
3400 rtx p0
= XEXP (XEXP (mem
, 0), 0);
3401 rtx p1
= XEXP (XEXP (mem
, 0), 1);
3402 if (regno_aligned_for_load (REGNO (p0
)))
3404 if (GET_CODE (p1
) == REG
&& regno_aligned_for_load (REGNO (p1
)))
3406 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3410 else if (GET_CODE (XEXP (mem
, 0)) == REG
)
3412 if (regno_aligned_for_load (REGNO (XEXP (mem
, 0))))
3415 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem
, 0)))
3417 else if (GET_CODE (XEXP (mem
, 0)) == CONST
)
3419 rtx p0
= XEXP (XEXP (XEXP (mem
, 0), 0), 0);
3420 rtx p1
= XEXP (XEXP (XEXP (mem
, 0), 0), 1);
3421 if (GET_CODE (p0
) == SYMBOL_REF
3422 && GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3428 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3429 into its SYMBOL_REF_FLAGS. */
3431 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
3433 default_encode_section_info (decl
, rtl
, first
);
3435 /* If a variable has a forced alignment to < 16 bytes, mark it with
3436 SYMBOL_FLAG_ALIGN1. */
3437 if (TREE_CODE (decl
) == VAR_DECL
3438 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
3439 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
3442 /* Return TRUE if we are certain the mem refers to a complete object
3443 which is both 16-byte aligned and padded to a 16-byte boundary. This
3444 would make it safe to store with a single instruction.
3445 We guarantee the alignment and padding for static objects by aligning
3446 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3447 FIXME: We currently cannot guarantee this for objects on the stack
3448 because assign_parm_setup_stack calls assign_stack_local with the
3449 alignment of the parameter mode and in that case the alignment never
3450 gets adjusted by LOCAL_ALIGNMENT. */
3452 store_with_one_insn_p (rtx mem
)
3454 rtx addr
= XEXP (mem
, 0);
3455 if (GET_MODE (mem
) == BLKmode
)
3457 /* Only static objects. */
3458 if (GET_CODE (addr
) == SYMBOL_REF
)
3460 /* We use the associated declaration to make sure the access is
3461 referring to the whole object.
3462 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3463 if it is necessary. Will there be cases where one exists, and
3464 the other does not? Will there be cases where both exist, but
3465 have different types? */
3466 tree decl
= MEM_EXPR (mem
);
3468 && TREE_CODE (decl
) == VAR_DECL
3469 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3471 decl
= SYMBOL_REF_DECL (addr
);
3473 && TREE_CODE (decl
) == VAR_DECL
3474 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3481 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
3483 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
3486 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
3488 rtx from
= SUBREG_REG (ops
[1]);
3489 enum machine_mode imode
= GET_MODE (from
);
3491 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
3492 && GET_MODE_CLASS (imode
) == MODE_INT
3493 && subreg_lowpart_p (ops
[1]));
3495 if (GET_MODE_SIZE (imode
) < 4)
3497 from
= gen_rtx_SUBREG (SImode
, from
, 0);
3501 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
3503 enum insn_code icode
= convert_optab_handler (trunc_optab
, mode
, imode
)->insn_code
;
3504 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
3507 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
3511 /* At least one of the operands needs to be a register. */
3512 if ((reload_in_progress
| reload_completed
) == 0
3513 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3515 rtx temp
= force_reg (mode
, ops
[1]);
3516 emit_move_insn (ops
[0], temp
);
3519 if (reload_in_progress
|| reload_completed
)
3521 if (CONSTANT_P (ops
[1]))
3522 return spu_split_immediate (ops
);
3527 if (GET_CODE (ops
[0]) == MEM
)
3529 if (!spu_valid_move (ops
))
3531 emit_insn (gen_store (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3532 gen_reg_rtx (TImode
)));
3536 else if (GET_CODE (ops
[1]) == MEM
)
3538 if (!spu_valid_move (ops
))
3541 (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3542 gen_reg_rtx (SImode
)));
3546 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3548 if (GET_CODE (ops
[1]) == CONST_INT
)
3550 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
3551 if (val
!= INTVAL (ops
[1]))
3553 emit_move_insn (ops
[0], GEN_INT (val
));
3564 /* For now, only frame registers are known to be aligned at all times.
3565 We can't trust REGNO_POINTER_ALIGN because optimization will move
3566 registers around, potentially changing an "aligned" register in an
3567 address to an unaligned register, which would result in an invalid
3569 int regno
= REGNO (reg
);
3570 return REGNO_PTR_FRAME_P (regno
) ? REGNO_POINTER_ALIGN (regno
) : 1;
3574 spu_split_load (rtx
* ops
)
3576 enum machine_mode mode
= GET_MODE (ops
[0]);
3577 rtx addr
, load
, rot
, mem
, p0
, p1
;
3580 addr
= XEXP (ops
[1], 0);
3584 if (GET_CODE (addr
) == PLUS
)
3587 aligned reg + aligned reg => lqx
3588 aligned reg + unaligned reg => lqx, rotqby
3589 aligned reg + aligned const => lqd
3590 aligned reg + unaligned const => lqd, rotqbyi
3591 unaligned reg + aligned reg => lqx, rotqby
3592 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3593 unaligned reg + aligned const => lqd, rotqby
3594 unaligned reg + unaligned const -> not allowed by legitimate address
3596 p0
= XEXP (addr
, 0);
3597 p1
= XEXP (addr
, 1);
3598 if (reg_align (p0
) < 128)
3600 if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3602 emit_insn (gen_addsi3 (ops
[3], p0
, p1
));
3610 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
3612 rot_amt
= INTVAL (p1
) & 15;
3613 p1
= GEN_INT (INTVAL (p1
) & -16);
3614 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3616 else if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3620 else if (GET_CODE (addr
) == REG
)
3622 if (reg_align (addr
) < 128)
3625 else if (GET_CODE (addr
) == CONST
)
3627 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3628 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3629 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3631 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3633 addr
= gen_rtx_CONST (Pmode
,
3634 gen_rtx_PLUS (Pmode
,
3635 XEXP (XEXP (addr
, 0), 0),
3636 GEN_INT (rot_amt
& -16)));
3638 addr
= XEXP (XEXP (addr
, 0), 0);
3643 else if (GET_CODE (addr
) == CONST_INT
)
3645 rot_amt
= INTVAL (addr
);
3646 addr
= GEN_INT (rot_amt
& -16);
3648 else if (!ALIGNED_SYMBOL_REF_P (addr
))
3651 if (GET_MODE_SIZE (mode
) < 4)
3652 rot_amt
+= GET_MODE_SIZE (mode
) - 4;
3658 emit_insn (gen_addsi3 (ops
[3], rot
, GEN_INT (rot_amt
)));
3665 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3666 mem
= change_address (ops
[1], TImode
, addr
);
3668 emit_insn (gen_movti (load
, mem
));
3671 emit_insn (gen_rotqby_ti (load
, load
, rot
));
3673 emit_insn (gen_rotlti3 (load
, load
, GEN_INT (rot_amt
* 8)));
3675 if (reload_completed
)
3676 emit_move_insn (ops
[0], gen_rtx_REG (GET_MODE (ops
[0]), REGNO (load
)));
3678 emit_insn (gen_spu_convert (ops
[0], load
));
3682 spu_split_store (rtx
* ops
)
3684 enum machine_mode mode
= GET_MODE (ops
[0]);
3687 rtx addr
, p0
, p1
, p1_lo
, smem
;
3691 addr
= XEXP (ops
[0], 0);
3693 if (GET_CODE (addr
) == PLUS
)
3696 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3697 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3698 aligned reg + aligned const => lqd, c?d, shuf, stqx
3699 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3700 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3701 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3702 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3703 unaligned reg + unaligned const -> not allowed by legitimate address
3706 p0
= XEXP (addr
, 0);
3707 p1
= p1_lo
= XEXP (addr
, 1);
3708 if (GET_CODE (p0
) == REG
&& GET_CODE (p1
) == CONST_INT
)
3710 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
3711 p1
= GEN_INT (INTVAL (p1
) & -16);
3712 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3715 else if (GET_CODE (addr
) == REG
)
3719 p1
= p1_lo
= const0_rtx
;
3724 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
3725 p1
= 0; /* aform doesn't use p1 */
3727 if (ALIGNED_SYMBOL_REF_P (addr
))
3729 else if (GET_CODE (addr
) == CONST
)
3731 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3732 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3733 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3735 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3737 addr
= gen_rtx_CONST (Pmode
,
3738 gen_rtx_PLUS (Pmode
,
3739 XEXP (XEXP (addr
, 0), 0),
3740 GEN_INT (v
& -16)));
3742 addr
= XEXP (XEXP (addr
, 0), 0);
3743 p1_lo
= GEN_INT (v
& 15);
3746 else if (GET_CODE (addr
) == CONST_INT
)
3748 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
3749 addr
= GEN_INT (INTVAL (addr
) & -16);
3753 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3755 scalar
= store_with_one_insn_p (ops
[0]);
3758 /* We could copy the flags from the ops[0] MEM to mem here,
3759 We don't because we want this load to be optimized away if
3760 possible, and copying the flags will prevent that in certain
3761 cases, e.g. consider the volatile flag. */
3763 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
3764 set_mem_alias_set (lmem
, 0);
3765 emit_insn (gen_movti (reg
, lmem
));
3767 if (!p0
|| reg_align (p0
) >= 128)
3768 p0
= stack_pointer_rtx
;
3772 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
3773 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
3775 else if (reload_completed
)
3777 if (GET_CODE (ops
[1]) == REG
)
3778 emit_move_insn (reg
, gen_rtx_REG (GET_MODE (reg
), REGNO (ops
[1])));
3779 else if (GET_CODE (ops
[1]) == SUBREG
)
3780 emit_move_insn (reg
,
3781 gen_rtx_REG (GET_MODE (reg
),
3782 REGNO (SUBREG_REG (ops
[1]))));
3788 if (GET_CODE (ops
[1]) == REG
)
3789 emit_insn (gen_spu_convert (reg
, ops
[1]));
3790 else if (GET_CODE (ops
[1]) == SUBREG
)
3791 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
3796 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
3797 emit_insn (gen_shlqby_ti
3798 (reg
, reg
, GEN_INT (4 - GET_MODE_SIZE (mode
))));
3800 smem
= change_address (ops
[0], TImode
, addr
);
3801 /* We can't use the previous alias set because the memory has changed
3802 size and can potentially overlap objects of other types. */
3803 set_mem_alias_set (smem
, 0);
3805 emit_insn (gen_movti (smem
, reg
));
3808 /* Return TRUE if X is MEM which is a struct member reference
3809 and the member can safely be loaded and stored with a single
3810 instruction because it is padded. */
3812 mem_is_padded_component_ref (rtx x
)
3814 tree t
= MEM_EXPR (x
);
3816 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
3818 t
= TREE_OPERAND (t
, 1);
3819 if (!t
|| TREE_CODE (t
) != FIELD_DECL
3820 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
3822 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3823 r
= DECL_FIELD_CONTEXT (t
);
3824 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
3826 /* Make sure they are the same mode */
3827 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
3829 /* If there are no following fields then the field alignment assures
3830 the structure is padded to the alignment which means this field is
3832 if (TREE_CHAIN (t
) == 0)
3834 /* If the following field is also aligned then this field will be
3837 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
3842 /* Parse the -mfixed-range= option string. */
3844 fix_range (const char *const_str
)
3847 char *str
, *dash
, *comma
;
3849 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3850 REG2 are either register names or register numbers. The effect
3851 of this option is to mark the registers in the range from REG1 to
3852 REG2 as ``fixed'' so they won't be used by the compiler. */
3854 i
= strlen (const_str
);
3855 str
= (char *) alloca (i
+ 1);
3856 memcpy (str
, const_str
, i
+ 1);
3860 dash
= strchr (str
, '-');
3863 warning (0, "value of -mfixed-range must have form REG1-REG2");
3867 comma
= strchr (dash
+ 1, ',');
3871 first
= decode_reg_name (str
);
3874 warning (0, "unknown register name: %s", str
);
3878 last
= decode_reg_name (dash
+ 1);
3881 warning (0, "unknown register name: %s", dash
+ 1);
3889 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
3893 for (i
= first
; i
<= last
; ++i
)
3894 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3905 spu_valid_move (rtx
* ops
)
3907 enum machine_mode mode
= GET_MODE (ops
[0]);
3908 if (!register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3911 /* init_expr_once tries to recog against load and store insns to set
3912 the direct_load[] and direct_store[] arrays. We always want to
3913 consider those loads and stores valid. init_expr_once is called in
3914 the context of a dummy function which does not have a decl. */
3915 if (cfun
->decl
== 0)
3918 /* Don't allows loads/stores which would require more than 1 insn.
3919 During and after reload we assume loads and stores only take 1
3921 if (GET_MODE_SIZE (mode
) < 16 && !reload_in_progress
&& !reload_completed
)
3923 if (GET_CODE (ops
[0]) == MEM
3924 && (GET_MODE_SIZE (mode
) < 4
3925 || !(store_with_one_insn_p (ops
[0])
3926 || mem_is_padded_component_ref (ops
[0]))))
3928 if (GET_CODE (ops
[1]) == MEM
3929 && (GET_MODE_SIZE (mode
) < 4 || !aligned_mem_p (ops
[1])))
3935 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3936 can be generated using the fsmbi instruction. */
3938 fsmbi_const_p (rtx x
)
3942 /* We can always choose TImode for CONST_INT because the high bits
3943 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3944 enum immediate_class c
= classify_immediate (x
, TImode
);
3945 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
3950 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3951 can be generated using the cbd, chd, cwd or cdd instruction. */
3953 cpat_const_p (rtx x
, enum machine_mode mode
)
3957 enum immediate_class c
= classify_immediate (x
, mode
);
3958 return c
== IC_CPAT
;
3964 gen_cpat_const (rtx
* ops
)
3966 unsigned char dst
[16];
3967 int i
, offset
, shift
, isize
;
3968 if (GET_CODE (ops
[3]) != CONST_INT
3969 || GET_CODE (ops
[2]) != CONST_INT
3970 || (GET_CODE (ops
[1]) != CONST_INT
3971 && GET_CODE (ops
[1]) != REG
))
3973 if (GET_CODE (ops
[1]) == REG
3974 && (!REG_POINTER (ops
[1])
3975 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
3978 for (i
= 0; i
< 16; i
++)
3980 isize
= INTVAL (ops
[3]);
3983 else if (isize
== 2)
3987 offset
= (INTVAL (ops
[2]) +
3988 (GET_CODE (ops
[1]) ==
3989 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
3990 for (i
= 0; i
< isize
; i
++)
3991 dst
[offset
+ i
] = i
+ shift
;
3992 return array_to_constant (TImode
, dst
);
3995 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3996 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3997 than 16 bytes, the value is repeated across the rest of the array. */
3999 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
4004 memset (arr
, 0, 16);
4005 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
4006 if (GET_CODE (x
) == CONST_INT
4007 || (GET_CODE (x
) == CONST_DOUBLE
4008 && (mode
== SFmode
|| mode
== DFmode
)))
4010 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
4012 if (GET_CODE (x
) == CONST_DOUBLE
)
4013 val
= const_double_to_hwint (x
);
4016 first
= GET_MODE_SIZE (mode
) - 1;
4017 for (i
= first
; i
>= 0; i
--)
4019 arr
[i
] = val
& 0xff;
4022 /* Splat the constant across the whole array. */
4023 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
4026 j
= (j
== first
) ? 0 : j
+ 1;
4029 else if (GET_CODE (x
) == CONST_DOUBLE
)
4031 val
= CONST_DOUBLE_LOW (x
);
4032 for (i
= 15; i
>= 8; i
--)
4034 arr
[i
] = val
& 0xff;
4037 val
= CONST_DOUBLE_HIGH (x
);
4038 for (i
= 7; i
>= 0; i
--)
4040 arr
[i
] = val
& 0xff;
4044 else if (GET_CODE (x
) == CONST_VECTOR
)
4048 mode
= GET_MODE_INNER (mode
);
4049 units
= CONST_VECTOR_NUNITS (x
);
4050 for (i
= 0; i
< units
; i
++)
4052 elt
= CONST_VECTOR_ELT (x
, i
);
4053 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
4055 if (GET_CODE (elt
) == CONST_DOUBLE
)
4056 val
= const_double_to_hwint (elt
);
4059 first
= GET_MODE_SIZE (mode
) - 1;
4060 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
4062 for (j
= first
; j
>= 0; j
--)
4064 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
4074 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4075 smaller than 16 bytes, use the bytes that would represent that value
4076 in a register, e.g., for QImode return the value of arr[3]. */
4078 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
4080 enum machine_mode inner_mode
;
4082 int units
, size
, i
, j
, k
;
4085 if (GET_MODE_CLASS (mode
) == MODE_INT
4086 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
4088 j
= GET_MODE_SIZE (mode
);
4089 i
= j
< 4 ? 4 - j
: 0;
4090 for (val
= 0; i
< j
; i
++)
4091 val
= (val
<< 8) | arr
[i
];
4092 val
= trunc_int_for_mode (val
, mode
);
4093 return GEN_INT (val
);
4099 for (i
= high
= 0; i
< 8; i
++)
4100 high
= (high
<< 8) | arr
[i
];
4101 for (i
= 8, val
= 0; i
< 16; i
++)
4102 val
= (val
<< 8) | arr
[i
];
4103 return immed_double_const (val
, high
, TImode
);
4107 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4108 val
= trunc_int_for_mode (val
, SImode
);
4109 return hwint_to_const_double (SFmode
, val
);
4113 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4115 val
|= (arr
[4] << 24) | (arr
[5] << 16) | (arr
[6] << 8) | arr
[7];
4116 return hwint_to_const_double (DFmode
, val
);
4119 if (!VECTOR_MODE_P (mode
))
4122 units
= GET_MODE_NUNITS (mode
);
4123 size
= GET_MODE_UNIT_SIZE (mode
);
4124 inner_mode
= GET_MODE_INNER (mode
);
4125 v
= rtvec_alloc (units
);
4127 for (k
= i
= 0; i
< units
; ++i
)
4130 for (j
= 0; j
< size
; j
++, k
++)
4131 val
= (val
<< 8) | arr
[k
];
4133 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
4134 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
4136 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
4141 return gen_rtx_CONST_VECTOR (mode
, v
);
4145 reloc_diagnostic (rtx x
)
4147 tree loc_decl
, decl
= 0;
4149 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
4152 if (GET_CODE (x
) == SYMBOL_REF
)
4153 decl
= SYMBOL_REF_DECL (x
);
4154 else if (GET_CODE (x
) == CONST
4155 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4156 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
4158 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4159 if (decl
&& !DECL_P (decl
))
4162 /* We use last_assemble_variable_decl to get line information. It's
4163 not always going to be right and might not even be close, but will
4164 be right for the more common cases. */
4165 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
4168 loc_decl
= last_assemble_variable_decl
;
4170 /* The decl could be a string constant. */
4171 if (decl
&& DECL_P (decl
))
4172 msg
= "%Jcreating run-time relocation for %qD";
4174 msg
= "creating run-time relocation";
4176 if (TARGET_WARN_RELOC
)
4177 warning (0, msg
, loc_decl
, decl
);
4179 error (msg
, loc_decl
, decl
);
4182 /* Hook into assemble_integer so we can generate an error for run-time
4183 relocations. The SPU ABI disallows them. */
4185 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4187 /* By default run-time relocations aren't supported, but we allow them
4188 in case users support it in their own run-time loader. And we provide
4189 a warning for those users that don't. */
4190 if ((GET_CODE (x
) == SYMBOL_REF
)
4191 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
4192 reloc_diagnostic (x
);
4194 return default_assemble_integer (x
, size
, aligned_p
);
4198 spu_asm_globalize_label (FILE * file
, const char *name
)
4200 fputs ("\t.global\t", file
);
4201 assemble_name (file
, name
);
4206 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
)
4208 enum machine_mode mode
= GET_MODE (x
);
4209 int cost
= COSTS_N_INSNS (2);
4211 /* Folding to a CONST_VECTOR will use extra space but there might
4212 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4213 only if it allows us to fold away multiple insns. Changing the cost
4214 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4215 because this cost will only be compared against a single insn.
4216 if (code == CONST_VECTOR)
4217 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4220 /* Use defaults for float operations. Not accurate but good enough. */
4223 *total
= COSTS_N_INSNS (13);
4228 *total
= COSTS_N_INSNS (6);
4234 if (satisfies_constraint_K (x
))
4236 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
4237 *total
= COSTS_N_INSNS (1);
4239 *total
= COSTS_N_INSNS (3);
4243 *total
= COSTS_N_INSNS (3);
4248 *total
= COSTS_N_INSNS (0);
4252 *total
= COSTS_N_INSNS (5);
4256 case FLOAT_TRUNCATE
:
4258 case UNSIGNED_FLOAT
:
4261 *total
= COSTS_N_INSNS (7);
4267 *total
= COSTS_N_INSNS (9);
4274 GET_CODE (XEXP (x
, 0)) ==
4275 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4276 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
4278 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4280 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4281 cost
= COSTS_N_INSNS (14);
4282 if ((val
& 0xffff) == 0)
4283 cost
= COSTS_N_INSNS (9);
4284 else if (val
> 0 && val
< 0x10000)
4285 cost
= COSTS_N_INSNS (11);
4294 *total
= COSTS_N_INSNS (20);
4301 *total
= COSTS_N_INSNS (4);
4304 if (XINT (x
, 1) == UNSPEC_CONVERT
)
4305 *total
= COSTS_N_INSNS (0);
4307 *total
= COSTS_N_INSNS (4);
4310 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4311 if (GET_MODE_CLASS (mode
) == MODE_INT
4312 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
4313 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
4314 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
4320 spu_eh_return_filter_mode (void)
4322 /* We would like this to be SImode, but sjlj exceptions seems to work
4323 only with word_mode. */
4327 /* Decide whether we can make a sibling call to a function. DECL is the
4328 declaration of the function being targeted by the call and EXP is the
4329 CALL_EXPR representing the call. */
4331 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4333 return decl
&& !TARGET_LARGE_MEM
;
4336 /* We need to correctly update the back chain pointer and the Available
4337 Stack Size (which is in the second slot of the sp register.) */
4339 spu_allocate_stack (rtx op0
, rtx op1
)
4342 rtx chain
= gen_reg_rtx (V4SImode
);
4343 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
4344 rtx sp
= gen_reg_rtx (V4SImode
);
4345 rtx splatted
= gen_reg_rtx (V4SImode
);
4346 rtx pat
= gen_reg_rtx (TImode
);
4348 /* copy the back chain so we can save it back again. */
4349 emit_move_insn (chain
, stack_bot
);
4351 op1
= force_reg (SImode
, op1
);
4353 v
= 0x1020300010203ll
;
4354 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
4355 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
4357 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
4358 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
4360 if (flag_stack_check
)
4362 rtx avail
= gen_reg_rtx(SImode
);
4363 rtx result
= gen_reg_rtx(SImode
);
4364 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
4365 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
4366 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
4369 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
4371 emit_move_insn (stack_bot
, chain
);
4373 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
4377 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4379 static unsigned char arr
[16] =
4380 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4381 rtx temp
= gen_reg_rtx (SImode
);
4382 rtx temp2
= gen_reg_rtx (SImode
);
4383 rtx temp3
= gen_reg_rtx (V4SImode
);
4384 rtx temp4
= gen_reg_rtx (V4SImode
);
4385 rtx pat
= gen_reg_rtx (TImode
);
4386 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4388 /* Restore the backchain from the first word, sp from the second. */
4389 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
4390 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
4392 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4394 /* Compute Available Stack Size for sp */
4395 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4396 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4398 /* Compute Available Stack Size for back chain */
4399 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
4400 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
4401 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
4403 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4404 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
4408 spu_init_libfuncs (void)
4410 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
4411 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
4412 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
4413 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
4414 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
4415 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
4416 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
4417 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
4418 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
4419 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
4420 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
4422 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
4423 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
4425 set_optab_libfunc (smul_optab
, TImode
, "__multi3");
4426 set_optab_libfunc (sdiv_optab
, TImode
, "__divti3");
4427 set_optab_libfunc (smod_optab
, TImode
, "__modti3");
4428 set_optab_libfunc (udiv_optab
, TImode
, "__udivti3");
4429 set_optab_libfunc (umod_optab
, TImode
, "__umodti3");
4430 set_optab_libfunc (udivmod_optab
, TImode
, "__udivmodti4");
4433 /* Make a subreg, stripping any existing subreg. We could possibly just
4434 call simplify_subreg, but in this case we know what we want. */
4436 spu_gen_subreg (enum machine_mode mode
, rtx x
)
4438 if (GET_CODE (x
) == SUBREG
)
4440 if (GET_MODE (x
) == mode
)
4442 return gen_rtx_SUBREG (mode
, x
, 0);
4446 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
4448 return (TYPE_MODE (type
) == BLKmode
4450 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
4451 || int_size_in_bytes (type
) >
4452 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
4455 /* Create the built-in types and functions */
4457 struct spu_builtin_description spu_builtins
[] = {
4458 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4459 {fcode, icode, name, type, params, NULL_TREE},
4460 #include "spu-builtins.def"
4465 spu_init_builtins (void)
4467 struct spu_builtin_description
*d
;
4470 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
4471 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
4472 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
4473 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
4474 V4SF_type_node
= build_vector_type (float_type_node
, 4);
4475 V2DF_type_node
= build_vector_type (double_type_node
, 2);
4477 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
4478 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
4479 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
4480 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
4482 spu_builtin_types
[SPU_BTI_QUADWORD
] = intTI_type_node
;
4484 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
4485 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
4486 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
4487 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
4488 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
4489 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
4490 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
4491 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
4492 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
4493 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
4494 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
4495 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
4497 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
4498 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
4499 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
4500 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
4501 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
4502 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
4503 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
4504 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
4506 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
4507 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
4509 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
4511 spu_builtin_types
[SPU_BTI_PTR
] =
4512 build_pointer_type (build_qualified_type
4514 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
4516 /* For each builtin we build a new prototype. The tree code will make
4517 sure nodes are shared. */
4518 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
4521 char name
[64]; /* build_function will make a copy. */
4527 /* Find last parm. */
4528 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
4533 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
4535 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
4537 sprintf (name
, "__builtin_%s", d
->name
);
4539 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
4541 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
4542 TREE_READONLY (d
->fndecl
) = 1;
4544 /* These builtins don't throw. */
4545 TREE_NOTHROW (d
->fndecl
) = 1;
4550 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4552 static unsigned char arr
[16] =
4553 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4555 rtx temp
= gen_reg_rtx (Pmode
);
4556 rtx temp2
= gen_reg_rtx (V4SImode
);
4557 rtx temp3
= gen_reg_rtx (V4SImode
);
4558 rtx pat
= gen_reg_rtx (TImode
);
4559 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4561 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4563 /* Restore the sp. */
4564 emit_move_insn (temp
, op1
);
4565 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
4567 /* Compute available stack size for sp. */
4568 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4569 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4571 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4572 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
4576 spu_safe_dma (HOST_WIDE_INT channel
)
4578 return (channel
>= 21 && channel
<= 27);
4582 spu_builtin_splats (rtx ops
[])
4584 enum machine_mode mode
= GET_MODE (ops
[0]);
4585 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
4587 unsigned char arr
[16];
4588 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
4589 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
4591 else if (!flag_pic
&& GET_MODE (ops
[0]) == V4SImode
&& CONSTANT_P (ops
[1]))
4593 rtvec v
= rtvec_alloc (4);
4594 RTVEC_ELT (v
, 0) = ops
[1];
4595 RTVEC_ELT (v
, 1) = ops
[1];
4596 RTVEC_ELT (v
, 2) = ops
[1];
4597 RTVEC_ELT (v
, 3) = ops
[1];
4598 emit_move_insn (ops
[0], gen_rtx_CONST_VECTOR (mode
, v
));
4602 rtx reg
= gen_reg_rtx (TImode
);
4604 if (GET_CODE (ops
[1]) != REG
4605 && GET_CODE (ops
[1]) != SUBREG
)
4606 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
4612 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
4618 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
4623 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
4628 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
4634 emit_move_insn (reg
, shuf
);
4635 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
4640 spu_builtin_extract (rtx ops
[])
4642 enum machine_mode mode
;
4645 mode
= GET_MODE (ops
[1]);
4647 if (GET_CODE (ops
[2]) == CONST_INT
)
4652 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
4655 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
4658 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
4661 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
4664 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
4667 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
4675 from
= spu_gen_subreg (TImode
, ops
[1]);
4676 rot
= gen_reg_rtx (TImode
);
4677 tmp
= gen_reg_rtx (SImode
);
4682 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
4685 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
4686 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
4690 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
4694 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
4699 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
4701 emit_insn (gen_spu_convert (ops
[0], rot
));
4705 spu_builtin_insert (rtx ops
[])
4707 enum machine_mode mode
= GET_MODE (ops
[0]);
4708 enum machine_mode imode
= GET_MODE_INNER (mode
);
4709 rtx mask
= gen_reg_rtx (TImode
);
4712 if (GET_CODE (ops
[3]) == CONST_INT
)
4713 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
4716 offset
= gen_reg_rtx (SImode
);
4717 emit_insn (gen_mulsi3
4718 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
4721 (mask
, stack_pointer_rtx
, offset
,
4722 GEN_INT (GET_MODE_SIZE (imode
))));
4723 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
4727 spu_builtin_promote (rtx ops
[])
4729 enum machine_mode mode
, imode
;
4730 rtx rot
, from
, offset
;
4733 mode
= GET_MODE (ops
[0]);
4734 imode
= GET_MODE_INNER (mode
);
4736 from
= gen_reg_rtx (TImode
);
4737 rot
= spu_gen_subreg (TImode
, ops
[0]);
4739 emit_insn (gen_spu_convert (from
, ops
[1]));
4741 if (GET_CODE (ops
[2]) == CONST_INT
)
4743 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
4744 if (GET_MODE_SIZE (imode
) < 4)
4745 pos
+= 4 - GET_MODE_SIZE (imode
);
4746 offset
= GEN_INT (pos
& 15);
4750 offset
= gen_reg_rtx (SImode
);
4754 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
4757 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
4758 emit_insn (gen_addsi3 (offset
, offset
, offset
));
4762 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
4763 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
4767 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
4773 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
4777 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
4779 rtx shuf
= gen_reg_rtx (V4SImode
);
4780 rtx insn
= gen_reg_rtx (V4SImode
);
4785 fnaddr
= force_reg (SImode
, fnaddr
);
4786 cxt
= force_reg (SImode
, cxt
);
4788 if (TARGET_LARGE_MEM
)
4790 rtx rotl
= gen_reg_rtx (V4SImode
);
4791 rtx mask
= gen_reg_rtx (V4SImode
);
4792 rtx bi
= gen_reg_rtx (SImode
);
4793 unsigned char shufa
[16] = {
4794 2, 3, 0, 1, 18, 19, 16, 17,
4795 0, 1, 2, 3, 16, 17, 18, 19
4797 unsigned char insna
[16] = {
4799 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
4801 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4804 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
4805 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4807 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
4808 emit_insn (gen_vrotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
4809 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
4810 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
4812 mem
= memory_address (Pmode
, tramp
);
4813 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4815 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
4816 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
4817 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
4821 rtx scxt
= gen_reg_rtx (SImode
);
4822 rtx sfnaddr
= gen_reg_rtx (SImode
);
4823 unsigned char insna
[16] = {
4824 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
4830 shufc
= gen_reg_rtx (TImode
);
4831 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4833 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4834 fits 18 bits and the last 4 are zeros. This will be true if
4835 the stack pointer is initialized to 0x3fff0 at program start,
4836 otherwise the ila instruction will be garbage. */
4838 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
4839 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
4841 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
4842 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
4843 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
4845 mem
= memory_address (Pmode
, tramp
);
4846 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4849 emit_insn (gen_sync ());
4853 spu_expand_sign_extend (rtx ops
[])
4855 unsigned char arr
[16];
4856 rtx pat
= gen_reg_rtx (TImode
);
4859 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
4860 if (GET_MODE (ops
[1]) == QImode
)
4862 sign
= gen_reg_rtx (HImode
);
4863 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
4864 for (i
= 0; i
< 16; i
++)
4870 for (i
= 0; i
< 16; i
++)
4872 switch (GET_MODE (ops
[1]))
4875 sign
= gen_reg_rtx (SImode
);
4876 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
4878 arr
[last
- 1] = 0x02;
4881 sign
= gen_reg_rtx (SImode
);
4882 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
4883 for (i
= 0; i
< 4; i
++)
4884 arr
[last
- i
] = 3 - i
;
4887 sign
= gen_reg_rtx (SImode
);
4888 c
= gen_reg_rtx (SImode
);
4889 emit_insn (gen_spu_convert (c
, ops
[1]));
4890 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
4891 for (i
= 0; i
< 8; i
++)
4892 arr
[last
- i
] = 7 - i
;
4898 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4899 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
4902 /* expand vector initialization. If there are any constant parts,
4903 load constant parts first. Then load any non-constant parts. */
4905 spu_expand_vector_init (rtx target
, rtx vals
)
4907 enum machine_mode mode
= GET_MODE (target
);
4908 int n_elts
= GET_MODE_NUNITS (mode
);
4910 bool all_same
= true;
4911 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
4914 first
= XVECEXP (vals
, 0, 0);
4915 for (i
= 0; i
< n_elts
; ++i
)
4917 x
= XVECEXP (vals
, 0, i
);
4918 if (!CONSTANT_P (x
))
4922 if (first_constant
== NULL_RTX
)
4925 if (i
> 0 && !rtx_equal_p (x
, first
))
4929 /* if all elements are the same, use splats to repeat elements */
4932 if (!CONSTANT_P (first
)
4933 && !register_operand (first
, GET_MODE (x
)))
4934 first
= force_reg (GET_MODE (first
), first
);
4935 emit_insn (gen_spu_splats (target
, first
));
4939 /* load constant parts */
4940 if (n_var
!= n_elts
)
4944 emit_move_insn (target
,
4945 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
4949 rtx constant_parts_rtx
= copy_rtx (vals
);
4951 gcc_assert (first_constant
!= NULL_RTX
);
4952 /* fill empty slots with the first constant, this increases
4953 our chance of using splats in the recursive call below. */
4954 for (i
= 0; i
< n_elts
; ++i
)
4955 if (!CONSTANT_P (XVECEXP (constant_parts_rtx
, 0, i
)))
4956 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
4958 spu_expand_vector_init (target
, constant_parts_rtx
);
4962 /* load variable parts */
4965 rtx insert_operands
[4];
4967 insert_operands
[0] = target
;
4968 insert_operands
[2] = target
;
4969 for (i
= 0; i
< n_elts
; ++i
)
4971 x
= XVECEXP (vals
, 0, i
);
4972 if (!CONSTANT_P (x
))
4974 if (!register_operand (x
, GET_MODE (x
)))
4975 x
= force_reg (GET_MODE (x
), x
);
4976 insert_operands
[1] = x
;
4977 insert_operands
[3] = GEN_INT (i
);
4978 spu_builtin_insert (insert_operands
);
4984 /* Return insn index for the vector compare instruction for given CODE,
4985 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4988 get_vec_cmp_insn (enum rtx_code code
,
4989 enum machine_mode dest_mode
,
4990 enum machine_mode op_mode
)
4996 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
4997 return CODE_FOR_ceq_v16qi
;
4998 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
4999 return CODE_FOR_ceq_v8hi
;
5000 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5001 return CODE_FOR_ceq_v4si
;
5002 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5003 return CODE_FOR_ceq_v4sf
;
5004 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5005 return CODE_FOR_ceq_v2df
;
5008 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5009 return CODE_FOR_cgt_v16qi
;
5010 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5011 return CODE_FOR_cgt_v8hi
;
5012 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5013 return CODE_FOR_cgt_v4si
;
5014 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5015 return CODE_FOR_cgt_v4sf
;
5016 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5017 return CODE_FOR_cgt_v2df
;
5020 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5021 return CODE_FOR_clgt_v16qi
;
5022 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5023 return CODE_FOR_clgt_v8hi
;
5024 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5025 return CODE_FOR_clgt_v4si
;
5033 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5034 DMODE is expected destination mode. This is a recursive function. */
5037 spu_emit_vector_compare (enum rtx_code rcode
,
5039 enum machine_mode dmode
)
5043 enum machine_mode dest_mode
;
5044 enum machine_mode op_mode
= GET_MODE (op1
);
5046 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
5048 /* Floating point vector compare instructions uses destination V4SImode.
5049 Double floating point vector compare instructions uses destination V2DImode.
5050 Move destination to appropriate mode later. */
5051 if (dmode
== V4SFmode
)
5052 dest_mode
= V4SImode
;
5053 else if (dmode
== V2DFmode
)
5054 dest_mode
= V2DImode
;
5058 mask
= gen_reg_rtx (dest_mode
);
5059 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5061 if (vec_cmp_insn
== -1)
5063 bool swap_operands
= false;
5064 bool try_again
= false;
5069 swap_operands
= true;
5074 swap_operands
= true;
5078 /* Treat A != B as ~(A==B). */
5080 enum insn_code nor_code
;
5081 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5082 nor_code
= optab_handler (one_cmpl_optab
, (int)dest_mode
)->insn_code
;
5083 gcc_assert (nor_code
!= CODE_FOR_nothing
);
5084 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
5085 if (dmode
!= dest_mode
)
5087 rtx temp
= gen_reg_rtx (dest_mode
);
5088 convert_move (temp
, mask
, 0);
5098 /* Try GT/GTU/LT/LTU OR EQ */
5101 enum insn_code ior_code
;
5102 enum rtx_code new_code
;
5106 case GE
: new_code
= GT
; break;
5107 case GEU
: new_code
= GTU
; break;
5108 case LE
: new_code
= LT
; break;
5109 case LEU
: new_code
= LTU
; break;
5114 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
5115 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5117 ior_code
= optab_handler (ior_optab
, (int)dest_mode
)->insn_code
;
5118 gcc_assert (ior_code
!= CODE_FOR_nothing
);
5119 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
5120 if (dmode
!= dest_mode
)
5122 rtx temp
= gen_reg_rtx (dest_mode
);
5123 convert_move (temp
, mask
, 0);
5133 /* You only get two chances. */
5135 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5137 gcc_assert (vec_cmp_insn
!= -1);
5148 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
5149 if (dmode
!= dest_mode
)
5151 rtx temp
= gen_reg_rtx (dest_mode
);
5152 convert_move (temp
, mask
, 0);
5159 /* Emit vector conditional expression.
5160 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5161 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5164 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
5165 rtx cond
, rtx cc_op0
, rtx cc_op1
)
5167 enum machine_mode dest_mode
= GET_MODE (dest
);
5168 enum rtx_code rcode
= GET_CODE (cond
);
5171 /* Get the vector mask for the given relational operations. */
5172 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
5174 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
5180 spu_force_reg (enum machine_mode mode
, rtx op
)
5183 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
5185 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
5186 || GET_MODE (op
) == BLKmode
)
5187 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
5191 r
= force_reg (GET_MODE (op
), op
);
5192 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
5194 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
5199 x
= gen_reg_rtx (mode
);
5200 emit_insn (gen_spu_convert (x
, r
));
5205 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
5207 HOST_WIDE_INT v
= 0;
5209 /* Check the range of immediate operands. */
5210 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
5212 int range
= p
- SPU_BTI_7
;
5214 if (!CONSTANT_P (op
))
5215 error ("%s expects an integer literal in the range [%d, %d].",
5217 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
5219 if (GET_CODE (op
) == CONST
5220 && (GET_CODE (XEXP (op
, 0)) == PLUS
5221 || GET_CODE (XEXP (op
, 0)) == MINUS
))
5223 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
5224 op
= XEXP (XEXP (op
, 0), 0);
5226 else if (GET_CODE (op
) == CONST_INT
)
5228 else if (GET_CODE (op
) == CONST_VECTOR
5229 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
5230 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
5232 /* The default for v is 0 which is valid in every range. */
5233 if (v
< spu_builtin_range
[range
].low
5234 || v
> spu_builtin_range
[range
].high
)
5235 error ("%s expects an integer literal in the range [%d, %d]. ("
5236 HOST_WIDE_INT_PRINT_DEC
")",
5238 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
5247 /* This is only used in lqa, and stqa. Even though the insns
5248 encode 16 bits of the address (all but the 2 least
5249 significant), only 14 bits are used because it is masked to
5250 be 16 byte aligned. */
5254 /* This is used for lqr and stqr. */
5261 if (GET_CODE (op
) == LABEL_REF
5262 || (GET_CODE (op
) == SYMBOL_REF
5263 && SYMBOL_REF_FUNCTION_P (op
))
5264 || (v
& ((1 << lsbits
) - 1)) != 0)
5265 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
5272 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
5273 rtx target
, rtx ops
[])
5275 enum insn_code icode
= d
->icode
;
5278 /* Expand the arguments into rtl. */
5280 if (d
->parm
[0] != SPU_BTI_VOID
)
5283 for (a
= 0; i
< insn_data
[icode
].n_operands
; i
++, a
++)
5285 tree arg
= CALL_EXPR_ARG (exp
, a
);
5288 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, 0);
5293 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
5294 tree exp
, rtx target
)
5298 enum insn_code icode
= d
->icode
;
5299 enum machine_mode mode
, tmode
;
5303 /* Set up ops[] with values from arglist. */
5304 expand_builtin_args (d
, exp
, target
, ops
);
5306 /* Handle the target operand which must be operand 0. */
5308 if (d
->parm
[0] != SPU_BTI_VOID
)
5311 /* We prefer the mode specified for the match_operand otherwise
5312 use the mode from the builtin function prototype. */
5313 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
5314 if (tmode
== VOIDmode
)
5315 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
5317 /* Try to use target because not using it can lead to extra copies
5318 and when we are using all of the registers extra copies leads
5320 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
5323 target
= ops
[0] = gen_reg_rtx (tmode
);
5325 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
5331 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5333 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
5338 arg
= CALL_EXPR_ARG (exp
, 0);
5339 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
5340 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
5341 addr
= memory_address (mode
, op
);
5344 op
= gen_reg_rtx (GET_MODE (addr
));
5345 emit_insn (gen_rtx_SET (VOIDmode
, op
,
5346 gen_rtx_NEG (GET_MODE (addr
), addr
)));
5347 op
= gen_rtx_MEM (mode
, op
);
5349 pat
= GEN_FCN (icode
) (target
, op
);
5356 /* Ignore align_hint, but still expand it's args in case they have
5358 if (icode
== CODE_FOR_spu_align_hint
)
5361 /* Handle the rest of the operands. */
5362 for (p
= 1; i
< insn_data
[icode
].n_operands
; i
++, p
++)
5364 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
5365 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
5367 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
5369 /* mode can be VOIDmode here for labels */
5371 /* For specific intrinsics with an immediate operand, e.g.,
5372 si_ai(), we sometimes need to convert the scalar argument to a
5373 vector argument by splatting the scalar. */
5374 if (VECTOR_MODE_P (mode
)
5375 && (GET_CODE (ops
[i
]) == CONST_INT
5376 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
5377 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
)
5378 && d
->parm
[i
] != SPU_BTI_QUADWORD
)
5380 if (GET_CODE (ops
[i
]) == CONST_INT
)
5381 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
5384 rtx reg
= gen_reg_rtx (mode
);
5385 enum machine_mode imode
= GET_MODE_INNER (mode
);
5386 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
5387 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
5388 if (imode
!= GET_MODE (ops
[i
]))
5389 ops
[i
] = convert_to_mode (imode
, ops
[i
],
5390 TYPE_UNSIGNED (spu_builtin_types
5392 emit_insn (gen_spu_splats (reg
, ops
[i
]));
5397 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
5399 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
5400 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
5403 switch (insn_data
[icode
].n_operands
)
5406 pat
= GEN_FCN (icode
) (0);
5409 pat
= GEN_FCN (icode
) (ops
[0]);
5412 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
5415 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
5418 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
5421 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
5424 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
5433 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
5434 emit_call_insn (pat
);
5435 else if (d
->type
== B_JUMP
)
5437 emit_jump_insn (pat
);
5443 return_type
= spu_builtin_types
[d
->parm
[0]];
5444 if (d
->parm
[0] != SPU_BTI_VOID
5445 && GET_MODE (target
) != TYPE_MODE (return_type
))
5447 /* target is the return value. It should always be the mode of
5448 the builtin function prototype. */
5449 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
5456 spu_expand_builtin (tree exp
,
5458 rtx subtarget ATTRIBUTE_UNUSED
,
5459 enum machine_mode mode ATTRIBUTE_UNUSED
,
5460 int ignore ATTRIBUTE_UNUSED
)
5462 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
5463 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
5464 struct spu_builtin_description
*d
;
5466 if (fcode
< NUM_SPU_BUILTINS
)
5468 d
= &spu_builtins
[fcode
];
5470 return spu_expand_builtin_1 (d
, exp
, target
);
5475 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5477 spu_builtin_mul_widen_even (tree type
)
5479 switch (TYPE_MODE (type
))
5482 if (TYPE_UNSIGNED (type
))
5483 return spu_builtins
[SPU_MULE_0
].fndecl
;
5485 return spu_builtins
[SPU_MULE_1
].fndecl
;
5492 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5494 spu_builtin_mul_widen_odd (tree type
)
5496 switch (TYPE_MODE (type
))
5499 if (TYPE_UNSIGNED (type
))
5500 return spu_builtins
[SPU_MULO_1
].fndecl
;
5502 return spu_builtins
[SPU_MULO_0
].fndecl
;
5509 /* Implement targetm.vectorize.builtin_mask_for_load. */
5511 spu_builtin_mask_for_load (void)
5513 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
5518 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5520 spu_builtin_vectorization_cost (bool runtime_test
)
5522 /* If the branch of the runtime test is taken - i.e. - the vectorized
5523 version is skipped - this incurs a misprediction cost (because the
5524 vectorized version is expected to be the fall-through). So we subtract
5525 the latency of a mispredicted branch from the costs that are incurred
5526 when the vectorized version is executed. */
5533 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5534 after applying N number of iterations. This routine does not determine
5535 how may iterations are required to reach desired alignment. */
5538 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5543 /* All other types are naturally aligned. */
5547 /* Count the total number of instructions in each pipe and return the
5548 maximum, which is used as the Minimum Iteration Interval (MII)
5549 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5550 -2 are instructions that can go in pipe0 or pipe1. */
5552 spu_sms_res_mii (struct ddg
*g
)
5555 unsigned t
[4] = {0, 0, 0, 0};
5557 for (i
= 0; i
< g
->num_nodes
; i
++)
5559 rtx insn
= g
->nodes
[i
].insn
;
5560 int p
= get_pipe (insn
) + 2;
5566 if (dump_file
&& INSN_P (insn
))
5567 fprintf (dump_file
, "i%d %s %d %d\n",
5569 insn_data
[INSN_CODE(insn
)].name
,
5573 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
5575 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
5580 spu_init_expanders (void)
5582 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5583 * frame_pointer_needed is true. We don't know that until we're
5584 * expanding the prologue. */
5586 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
5589 static enum machine_mode
5590 spu_libgcc_cmp_return_mode (void)
5593 /* For SPU word mode is TI mode so it is better to use SImode
5594 for compare returns. */
5598 static enum machine_mode
5599 spu_libgcc_shift_count_mode (void)
5601 /* For SPU word mode is TI mode so it is better to use SImode
5602 for shift counts. */