1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
19 #include "coretypes.h"
23 #include "hard-reg-set.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
37 #include "basic-block.h"
38 #include "integrate.h"
44 #include "target-def.h"
45 #include "langhooks.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
53 #include "tree-gimple.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range
[] = {
65 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
66 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
68 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
69 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
71 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
72 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
73 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
87 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
88 static rtx
get_pic_reg (void);
89 static int need_to_save_reg (int regno
, int saving
);
90 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
91 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
92 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
94 static void emit_nop_for_insn (rtx insn
);
95 static bool insn_clobbers_hbr (rtx insn
);
96 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
98 static rtx
spu_emit_vector_compare (enum rtx_code rcode
, rtx op0
, rtx op1
,
99 enum machine_mode dmode
);
100 static rtx
get_branch_target (rtx branch
);
101 static void insert_branch_hints (void);
102 static void insert_nops (void);
103 static void spu_machine_dependent_reorg (void);
104 static int spu_sched_issue_rate (void);
105 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
107 static int get_pipe (rtx insn
);
108 static int spu_sched_adjust_priority (rtx insn
, int pri
);
109 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
110 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
112 unsigned char *no_add_attrs
);
113 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
115 unsigned char *no_add_attrs
);
116 static int spu_naked_function_p (tree func
);
117 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS
*cum
, enum machine_mode mode
,
118 const_tree type
, unsigned char named
);
119 static tree
spu_build_builtin_va_list (void);
120 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
122 static int regno_aligned_for_load (int regno
);
123 static int store_with_one_insn_p (rtx mem
);
124 static int reg_align (rtx reg
);
125 static int mem_is_padded_component_ref (rtx x
);
126 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
127 static void spu_asm_globalize_label (FILE * file
, const char *name
);
128 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
130 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
131 static void spu_init_libfuncs (void);
132 static bool spu_return_in_memory (const_tree type
, const_tree fntype
);
133 static void fix_range (const char *);
134 static void spu_encode_section_info (tree
, rtx
, int);
135 static tree
spu_builtin_mul_widen_even (tree
);
136 static tree
spu_builtin_mul_widen_odd (tree
);
137 static tree
spu_builtin_mask_for_load (void);
138 static int spu_builtin_vectorization_cost (bool);
139 static bool spu_vector_alignment_reachable (const_tree
, bool);
140 static int spu_sms_res_mii (struct ddg
*g
);
142 extern const char *reg_names
[];
143 rtx spu_compare_op0
, spu_compare_op1
;
145 /* Which instruction set architecture to use. */
147 /* Which cpu are we tuning for. */
163 IC_POOL
, /* constant pool */
164 IC_IL1
, /* one il* instruction */
165 IC_IL2
, /* both ilhu and iohl instructions */
166 IC_IL1s
, /* one il* instruction */
167 IC_IL2s
, /* both ilhu and iohl instructions */
168 IC_FSMBI
, /* the fsmbi instruction */
169 IC_CPAT
, /* one of the c*d instructions */
170 IC_FSMBI2
/* fsmbi plus 1 other instruction */
173 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
174 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
175 static int cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
);
176 static enum immediate_class
classify_immediate (rtx op
,
177 enum machine_mode mode
);
179 static enum machine_mode
180 spu_libgcc_cmp_return_mode (void);
182 static enum machine_mode
183 spu_libgcc_shift_count_mode (void);
185 /* Built in types. */
186 tree spu_builtin_types
[SPU_BTI_MAX
];
188 /* TARGET overrides. */
190 #undef TARGET_INIT_BUILTINS
191 #define TARGET_INIT_BUILTINS spu_init_builtins
193 #undef TARGET_EXPAND_BUILTIN
194 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
196 #undef TARGET_EH_RETURN_FILTER_MODE
197 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
199 /* The .8byte directive doesn't seem to work well for a 32 bit
201 #undef TARGET_ASM_UNALIGNED_DI_OP
202 #define TARGET_ASM_UNALIGNED_DI_OP NULL
204 #undef TARGET_RTX_COSTS
205 #define TARGET_RTX_COSTS spu_rtx_costs
207 #undef TARGET_ADDRESS_COST
208 #define TARGET_ADDRESS_COST hook_int_rtx_0
210 #undef TARGET_SCHED_ISSUE_RATE
211 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
213 #undef TARGET_SCHED_VARIABLE_ISSUE
214 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
216 #undef TARGET_SCHED_ADJUST_PRIORITY
217 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
219 #undef TARGET_SCHED_ADJUST_COST
220 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
222 const struct attribute_spec spu_attribute_table
[];
223 #undef TARGET_ATTRIBUTE_TABLE
224 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
226 #undef TARGET_ASM_INTEGER
227 #define TARGET_ASM_INTEGER spu_assemble_integer
229 #undef TARGET_SCALAR_MODE_SUPPORTED_P
230 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
232 #undef TARGET_VECTOR_MODE_SUPPORTED_P
233 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
235 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
236 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
238 #undef TARGET_ASM_GLOBALIZE_LABEL
239 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
241 #undef TARGET_PASS_BY_REFERENCE
242 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
244 #undef TARGET_MUST_PASS_IN_STACK
245 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
247 #undef TARGET_BUILD_BUILTIN_VA_LIST
248 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
250 #undef TARGET_SETUP_INCOMING_VARARGS
251 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
253 #undef TARGET_MACHINE_DEPENDENT_REORG
254 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
256 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
257 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
259 #undef TARGET_DEFAULT_TARGET_FLAGS
260 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
262 #undef TARGET_INIT_LIBFUNCS
263 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
265 #undef TARGET_RETURN_IN_MEMORY
266 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
268 #undef TARGET_ENCODE_SECTION_INFO
269 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
271 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
272 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
274 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
275 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
277 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
278 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
280 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
281 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
283 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
284 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
286 #undef TARGET_LIBGCC_CMP_RETURN_MODE
287 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
289 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
290 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
292 #undef TARGET_SCHED_SMS_RES_MII
293 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
295 struct gcc_target targetm
= TARGET_INITIALIZER
;
298 spu_optimization_options (int level ATTRIBUTE_UNUSED
, int size ATTRIBUTE_UNUSED
)
300 /* Override some of the default param values. With so many registers
301 larger values are better for these params. */
302 MAX_PENDING_LIST_LENGTH
= 128;
304 /* With so many registers this is better on by default. */
305 flag_rename_registers
= 1;
308 /* Sometimes certain combinations of command options do not make sense
309 on a particular target machine. You can define a macro
310 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
311 executed once just after all the command options have been parsed. */
313 spu_override_options (void)
315 /* Small loops will be unpeeled at -O3. For SPU it is more important
316 to keep code small by default. */
317 if (!flag_unroll_loops
&& !flag_peel_loops
318 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES
))
319 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES
) = 1;
321 flag_omit_frame_pointer
= 1;
323 if (align_functions
< 8)
326 if (spu_fixed_range_string
)
327 fix_range (spu_fixed_range_string
);
329 /* Determine processor architectural level. */
332 if (strcmp (&spu_arch_string
[0], "cell") == 0)
333 spu_arch
= PROCESSOR_CELL
;
334 else if (strcmp (&spu_arch_string
[0], "celledp") == 0)
335 spu_arch
= PROCESSOR_CELLEDP
;
337 error ("Unknown architecture '%s'", &spu_arch_string
[0]);
340 /* Determine processor to tune for. */
343 if (strcmp (&spu_tune_string
[0], "cell") == 0)
344 spu_tune
= PROCESSOR_CELL
;
345 else if (strcmp (&spu_tune_string
[0], "celledp") == 0)
346 spu_tune
= PROCESSOR_CELLEDP
;
348 error ("Unknown architecture '%s'", &spu_tune_string
[0]);
352 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
353 struct attribute_spec.handler. */
355 /* Table of machine attributes. */
356 const struct attribute_spec spu_attribute_table
[] =
358 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
359 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
360 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
361 { NULL
, 0, 0, false, false, false, NULL
}
364 /* True if MODE is valid for the target. By "valid", we mean able to
365 be manipulated in non-trivial ways. In particular, this means all
366 the arithmetic is supported. */
368 spu_scalar_mode_supported_p (enum machine_mode mode
)
386 /* Similarly for vector modes. "Supported" here is less strict. At
387 least some operations are supported; need to check optabs or builtins
388 for further details. */
390 spu_vector_mode_supported_p (enum machine_mode mode
)
407 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
408 least significant bytes of the outer mode. This function returns
409 TRUE for the SUBREG's where this is correct. */
411 valid_subreg (rtx op
)
413 enum machine_mode om
= GET_MODE (op
);
414 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
415 return om
!= VOIDmode
&& im
!= VOIDmode
416 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
417 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4));
420 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
421 and adjust the start offset. */
423 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
425 enum machine_mode mode
;
427 /* Strip any SUBREG */
428 if (GET_CODE (op
) == SUBREG
)
432 GET_MODE_BITSIZE (GET_MODE (op
)) -
433 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
434 op
= SUBREG_REG (op
);
436 /* If it is smaller than SI, assure a SUBREG */
437 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
441 *start
+= 32 - op_size
;
444 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
445 mode
= mode_for_size (op_size
, MODE_INT
, 0);
446 if (mode
!= GET_MODE (op
))
447 op
= gen_rtx_SUBREG (mode
, op
, 0);
452 spu_expand_extv (rtx ops
[], int unsignedp
)
454 HOST_WIDE_INT width
= INTVAL (ops
[2]);
455 HOST_WIDE_INT start
= INTVAL (ops
[3]);
456 HOST_WIDE_INT src_size
, dst_size
;
457 enum machine_mode src_mode
, dst_mode
;
458 rtx dst
= ops
[0], src
= ops
[1];
461 dst
= adjust_operand (ops
[0], 0);
462 dst_mode
= GET_MODE (dst
);
463 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
465 src
= adjust_operand (src
, &start
);
466 src_mode
= GET_MODE (src
);
467 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
471 s
= gen_reg_rtx (src_mode
);
475 emit_insn (gen_ashlsi3 (s
, src
, GEN_INT (start
)));
478 emit_insn (gen_ashldi3 (s
, src
, GEN_INT (start
)));
481 emit_insn (gen_ashlti3 (s
, src
, GEN_INT (start
)));
489 if (width
< src_size
)
496 icode
= unsignedp
? CODE_FOR_lshrsi3
: CODE_FOR_ashrsi3
;
499 icode
= unsignedp
? CODE_FOR_lshrdi3
: CODE_FOR_ashrdi3
;
502 icode
= unsignedp
? CODE_FOR_lshrti3
: CODE_FOR_ashrti3
;
507 s
= gen_reg_rtx (src_mode
);
508 pat
= GEN_FCN (icode
) (s
, src
, GEN_INT (src_size
- width
));
513 convert_move (dst
, src
, unsignedp
);
517 spu_expand_insv (rtx ops
[])
519 HOST_WIDE_INT width
= INTVAL (ops
[1]);
520 HOST_WIDE_INT start
= INTVAL (ops
[2]);
521 HOST_WIDE_INT maskbits
;
522 enum machine_mode dst_mode
, src_mode
;
523 rtx dst
= ops
[0], src
= ops
[3];
524 int dst_size
, src_size
;
530 if (GET_CODE (ops
[0]) == MEM
)
531 dst
= gen_reg_rtx (TImode
);
533 dst
= adjust_operand (dst
, &start
);
534 dst_mode
= GET_MODE (dst
);
535 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
537 if (CONSTANT_P (src
))
539 enum machine_mode m
=
540 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
541 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
543 src
= adjust_operand (src
, 0);
544 src_mode
= GET_MODE (src
);
545 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
547 mask
= gen_reg_rtx (dst_mode
);
548 shift_reg
= gen_reg_rtx (dst_mode
);
549 shift
= dst_size
- start
- width
;
551 /* It's not safe to use subreg here because the compiler assumes
552 that the SUBREG_REG is right justified in the SUBREG. */
553 convert_move (shift_reg
, src
, 1);
560 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
563 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
566 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
578 maskbits
= (-1ll << (32 - width
- start
));
580 maskbits
+= (1ll << (32 - start
));
581 emit_move_insn (mask
, GEN_INT (maskbits
));
584 maskbits
= (-1ll << (64 - width
- start
));
586 maskbits
+= (1ll << (64 - start
));
587 emit_move_insn (mask
, GEN_INT (maskbits
));
591 unsigned char arr
[16];
593 memset (arr
, 0, sizeof (arr
));
594 arr
[i
] = 0xff >> (start
& 7);
595 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
597 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
598 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
604 if (GET_CODE (ops
[0]) == MEM
)
606 rtx aligned
= gen_reg_rtx (SImode
);
607 rtx low
= gen_reg_rtx (SImode
);
608 rtx addr
= gen_reg_rtx (SImode
);
609 rtx rotl
= gen_reg_rtx (SImode
);
610 rtx mask0
= gen_reg_rtx (TImode
);
613 emit_move_insn (addr
, XEXP (ops
[0], 0));
614 emit_insn (gen_andsi3 (aligned
, addr
, GEN_INT (-16)));
615 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
616 emit_insn (gen_negsi2 (rotl
, low
));
617 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
618 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
619 mem
= change_address (ops
[0], TImode
, aligned
);
620 set_mem_alias_set (mem
, 0);
621 emit_move_insn (dst
, mem
);
622 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
623 emit_move_insn (mem
, dst
);
624 if (start
+ width
> MEM_ALIGN (ops
[0]))
626 rtx shl
= gen_reg_rtx (SImode
);
627 rtx mask1
= gen_reg_rtx (TImode
);
628 rtx dst1
= gen_reg_rtx (TImode
);
630 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
631 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
632 mem1
= adjust_address (mem
, TImode
, 16);
633 set_mem_alias_set (mem1
, 0);
634 emit_move_insn (dst1
, mem1
);
635 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
636 emit_move_insn (mem1
, dst1
);
640 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask
));
645 spu_expand_block_move (rtx ops
[])
647 HOST_WIDE_INT bytes
, align
, offset
;
648 rtx src
, dst
, sreg
, dreg
, target
;
650 if (GET_CODE (ops
[2]) != CONST_INT
651 || GET_CODE (ops
[3]) != CONST_INT
652 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO
* 8))
655 bytes
= INTVAL (ops
[2]);
656 align
= INTVAL (ops
[3]);
666 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
668 dst
= adjust_address (ops
[0], V16QImode
, offset
);
669 src
= adjust_address (ops
[1], V16QImode
, offset
);
670 emit_move_insn (dst
, src
);
675 unsigned char arr
[16] = { 0 };
676 for (i
= 0; i
< bytes
- offset
; i
++)
678 dst
= adjust_address (ops
[0], V16QImode
, offset
);
679 src
= adjust_address (ops
[1], V16QImode
, offset
);
680 mask
= gen_reg_rtx (V16QImode
);
681 sreg
= gen_reg_rtx (V16QImode
);
682 dreg
= gen_reg_rtx (V16QImode
);
683 target
= gen_reg_rtx (V16QImode
);
684 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
685 emit_move_insn (dreg
, dst
);
686 emit_move_insn (sreg
, src
);
687 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
688 emit_move_insn (dst
, target
);
696 { SPU_EQ
, SPU_GT
, SPU_GTU
};
698 int spu_comp_icode
[12][3] = {
699 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
700 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
701 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
702 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
703 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
704 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
705 {CODE_FOR_ceq_df
, CODE_FOR_cgt_df
, 0},
706 {CODE_FOR_ceq_v16qi
, CODE_FOR_cgt_v16qi
, CODE_FOR_clgt_v16qi
},
707 {CODE_FOR_ceq_v8hi
, CODE_FOR_cgt_v8hi
, CODE_FOR_clgt_v8hi
},
708 {CODE_FOR_ceq_v4si
, CODE_FOR_cgt_v4si
, CODE_FOR_clgt_v4si
},
709 {CODE_FOR_ceq_v4sf
, CODE_FOR_cgt_v4sf
, 0},
710 {CODE_FOR_ceq_v2df
, CODE_FOR_cgt_v2df
, 0},
713 /* Generate a compare for CODE. Return a brand-new rtx that represents
714 the result of the compare. GCC can figure this out too if we don't
715 provide all variations of compares, but GCC always wants to use
716 WORD_MODE, we can generate better code in most cases if we do it
719 spu_emit_branch_or_set (int is_set
, enum rtx_code code
, rtx operands
[])
721 int reverse_compare
= 0;
722 int reverse_test
= 0;
723 rtx compare_result
, eq_result
;
724 rtx comp_rtx
, eq_rtx
;
725 rtx target
= operands
[0];
726 enum machine_mode comp_mode
;
727 enum machine_mode op_mode
;
728 enum spu_comp_code scode
, eq_code
, ior_code
;
732 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
733 and so on, to keep the constant in operand 1. */
734 if (GET_CODE (spu_compare_op1
) == CONST_INT
)
736 HOST_WIDE_INT val
= INTVAL (spu_compare_op1
) - 1;
737 if (trunc_int_for_mode (val
, GET_MODE (spu_compare_op0
)) == val
)
741 spu_compare_op1
= GEN_INT (val
);
745 spu_compare_op1
= GEN_INT (val
);
749 spu_compare_op1
= GEN_INT (val
);
753 spu_compare_op1
= GEN_INT (val
);
762 op_mode
= GET_MODE (spu_compare_op0
);
768 if (HONOR_NANS (op_mode
) && spu_arch
== PROCESSOR_CELLEDP
)
783 if (HONOR_NANS (op_mode
) && spu_arch
== PROCESSOR_CELLEDP
)
875 comp_mode
= V4SImode
;
879 comp_mode
= V2DImode
;
886 if (GET_MODE (spu_compare_op1
) == DFmode
)
888 rtx reg
= gen_reg_rtx (DFmode
);
889 if ((!flag_unsafe_math_optimizations
&& spu_arch
== PROCESSOR_CELL
)
890 || (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
892 if (spu_arch
== PROCESSOR_CELL
)
895 emit_insn (gen_subdf3 (reg
, spu_compare_op1
, spu_compare_op0
));
897 emit_insn (gen_subdf3 (reg
, spu_compare_op0
, spu_compare_op1
));
899 spu_compare_op0
= reg
;
900 spu_compare_op1
= CONST0_RTX (DFmode
);
904 if (is_set
== 0 && spu_compare_op1
== const0_rtx
905 && (GET_MODE (spu_compare_op0
) == SImode
906 || GET_MODE (spu_compare_op0
) == HImode
) && scode
== SPU_EQ
)
908 /* Don't need to set a register with the result when we are
909 comparing against zero and branching. */
910 reverse_test
= !reverse_test
;
911 compare_result
= spu_compare_op0
;
915 compare_result
= gen_reg_rtx (comp_mode
);
919 rtx t
= spu_compare_op1
;
920 spu_compare_op1
= spu_compare_op0
;
924 if (spu_comp_icode
[index
][scode
] == 0)
927 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
928 (spu_compare_op0
, op_mode
))
929 spu_compare_op0
= force_reg (op_mode
, spu_compare_op0
);
930 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
931 (spu_compare_op1
, op_mode
))
932 spu_compare_op1
= force_reg (op_mode
, spu_compare_op1
);
933 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
938 emit_insn (comp_rtx
);
942 eq_result
= gen_reg_rtx (comp_mode
);
943 eq_rtx
= GEN_FCN (spu_comp_icode
[index
][eq_code
]) (eq_result
,
949 ior_code
= ior_optab
->handlers
[(int)comp_mode
].insn_code
;
950 gcc_assert (ior_code
!= CODE_FOR_nothing
);
951 emit_insn (GEN_FCN (ior_code
)
952 (compare_result
, compare_result
, eq_result
));
961 /* We don't have branch on QI compare insns, so we convert the
962 QI compare result to a HI result. */
963 if (comp_mode
== QImode
)
965 rtx old_res
= compare_result
;
966 compare_result
= gen_reg_rtx (HImode
);
968 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
972 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
974 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
976 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, target
);
977 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
978 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
981 else if (is_set
== 2)
983 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
984 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
985 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
987 rtx op_t
= operands
[2];
988 rtx op_f
= operands
[3];
990 /* The result of the comparison can be SI, HI or QI mode. Create a
991 mask based on that result. */
992 if (target_size
> compare_size
)
994 select_mask
= gen_reg_rtx (mode
);
995 emit_insn (gen_extend_compare (select_mask
, compare_result
));
997 else if (target_size
< compare_size
)
999 gen_rtx_SUBREG (mode
, compare_result
,
1000 (compare_size
- target_size
) / BITS_PER_UNIT
);
1001 else if (comp_mode
!= mode
)
1002 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
1004 select_mask
= compare_result
;
1006 if (GET_MODE (target
) != GET_MODE (op_t
)
1007 || GET_MODE (target
) != GET_MODE (op_f
))
1011 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
1013 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
1018 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
1019 gen_rtx_NOT (comp_mode
, compare_result
)));
1020 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
1021 emit_insn (gen_extendhisi2 (target
, compare_result
));
1022 else if (GET_MODE (target
) == SImode
1023 && GET_MODE (compare_result
) == QImode
)
1024 emit_insn (gen_extend_compare (target
, compare_result
));
1026 emit_move_insn (target
, compare_result
);
1031 const_double_to_hwint (rtx x
)
1035 if (GET_MODE (x
) == SFmode
)
1037 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1038 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
1040 else if (GET_MODE (x
) == DFmode
)
1043 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
1044 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
1046 val
= (val
<< 32) | (l
[1] & 0xffffffff);
1054 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
1058 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
1061 tv
[0] = (v
<< 32) >> 32;
1062 else if (mode
== DFmode
)
1064 tv
[1] = (v
<< 32) >> 32;
1067 real_from_target (&rv
, tv
, mode
);
1068 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
1072 print_operand_address (FILE * file
, register rtx addr
)
1077 if (GET_CODE (addr
) == AND
1078 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
1079 && INTVAL (XEXP (addr
, 1)) == -16)
1080 addr
= XEXP (addr
, 0);
1082 switch (GET_CODE (addr
))
1085 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
1089 reg
= XEXP (addr
, 0);
1090 offset
= XEXP (addr
, 1);
1091 if (GET_CODE (offset
) == REG
)
1093 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
1094 reg_names
[REGNO (offset
)]);
1096 else if (GET_CODE (offset
) == CONST_INT
)
1098 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
1099 INTVAL (offset
), reg_names
[REGNO (reg
)]);
1109 output_addr_const (file
, addr
);
1119 print_operand (FILE * file
, rtx x
, int code
)
1121 enum machine_mode mode
= GET_MODE (x
);
1123 unsigned char arr
[16];
1124 int xcode
= GET_CODE (x
);
1126 if (GET_MODE (x
) == VOIDmode
)
1129 case 'L': /* 128 bits, signed */
1130 case 'm': /* 128 bits, signed */
1131 case 'T': /* 128 bits, signed */
1132 case 't': /* 128 bits, signed */
1135 case 'K': /* 64 bits, signed */
1136 case 'k': /* 64 bits, signed */
1137 case 'D': /* 64 bits, signed */
1138 case 'd': /* 64 bits, signed */
1141 case 'J': /* 32 bits, signed */
1142 case 'j': /* 32 bits, signed */
1143 case 's': /* 32 bits, signed */
1144 case 'S': /* 32 bits, signed */
1151 case 'j': /* 32 bits, signed */
1152 case 'k': /* 64 bits, signed */
1153 case 'm': /* 128 bits, signed */
1154 if (xcode
== CONST_INT
1155 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1157 gcc_assert (logical_immediate_p (x
, mode
));
1158 constant_to_array (mode
, x
, arr
);
1159 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1160 val
= trunc_int_for_mode (val
, SImode
);
1161 switch (which_logical_immediate (val
))
1166 fprintf (file
, "h");
1169 fprintf (file
, "b");
1179 case 'J': /* 32 bits, signed */
1180 case 'K': /* 64 bits, signed */
1181 case 'L': /* 128 bits, signed */
1182 if (xcode
== CONST_INT
1183 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1185 gcc_assert (logical_immediate_p (x
, mode
)
1186 || iohl_immediate_p (x
, mode
));
1187 constant_to_array (mode
, x
, arr
);
1188 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1189 val
= trunc_int_for_mode (val
, SImode
);
1190 switch (which_logical_immediate (val
))
1196 val
= trunc_int_for_mode (val
, HImode
);
1199 val
= trunc_int_for_mode (val
, QImode
);
1204 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1210 case 't': /* 128 bits, signed */
1211 case 'd': /* 64 bits, signed */
1212 case 's': /* 32 bits, signed */
1215 enum immediate_class c
= classify_immediate (x
, mode
);
1219 constant_to_array (mode
, x
, arr
);
1220 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1221 val
= trunc_int_for_mode (val
, SImode
);
1222 switch (which_immediate_load (val
))
1227 fprintf (file
, "a");
1230 fprintf (file
, "h");
1233 fprintf (file
, "hu");
1240 constant_to_array (mode
, x
, arr
);
1241 cpat_info (arr
, GET_MODE_SIZE (mode
), &info
, 0);
1243 fprintf (file
, "b");
1245 fprintf (file
, "h");
1247 fprintf (file
, "w");
1249 fprintf (file
, "d");
1252 if (xcode
== CONST_VECTOR
)
1254 x
= CONST_VECTOR_ELT (x
, 0);
1255 xcode
= GET_CODE (x
);
1257 if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1258 fprintf (file
, "a");
1259 else if (xcode
== HIGH
)
1260 fprintf (file
, "hu");
1274 case 'T': /* 128 bits, signed */
1275 case 'D': /* 64 bits, signed */
1276 case 'S': /* 32 bits, signed */
1279 enum immediate_class c
= classify_immediate (x
, mode
);
1283 constant_to_array (mode
, x
, arr
);
1284 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1285 val
= trunc_int_for_mode (val
, SImode
);
1286 switch (which_immediate_load (val
))
1293 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1298 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1301 constant_to_array (mode
, x
, arr
);
1303 for (i
= 0; i
< 16; i
++)
1308 print_operand (file
, GEN_INT (val
), 0);
1311 constant_to_array (mode
, x
, arr
);
1312 cpat_info (arr
, GET_MODE_SIZE (mode
), 0, &info
);
1313 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, (HOST_WIDE_INT
)info
);
1318 if (GET_CODE (x
) == CONST_VECTOR
)
1319 x
= CONST_VECTOR_ELT (x
, 0);
1320 output_addr_const (file
, x
);
1322 fprintf (file
, "@h");
1336 if (xcode
== CONST_INT
)
1338 /* Only 4 least significant bits are relevant for generate
1339 control word instructions. */
1340 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1345 case 'M': /* print code for c*d */
1346 if (GET_CODE (x
) == CONST_INT
)
1350 fprintf (file
, "b");
1353 fprintf (file
, "h");
1356 fprintf (file
, "w");
1359 fprintf (file
, "d");
1368 case 'N': /* Negate the operand */
1369 if (xcode
== CONST_INT
)
1370 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1371 else if (xcode
== CONST_VECTOR
)
1372 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1373 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1376 case 'I': /* enable/disable interrupts */
1377 if (xcode
== CONST_INT
)
1378 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1381 case 'b': /* branch modifiers */
1383 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1384 else if (COMPARISON_P (x
))
1385 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1388 case 'i': /* indirect call */
1391 if (GET_CODE (XEXP (x
, 0)) == REG
)
1392 /* Used in indirect function calls. */
1393 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1395 output_address (XEXP (x
, 0));
1399 case 'p': /* load/store */
1403 xcode
= GET_CODE (x
);
1408 xcode
= GET_CODE (x
);
1411 fprintf (file
, "d");
1412 else if (xcode
== CONST_INT
)
1413 fprintf (file
, "a");
1414 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1415 fprintf (file
, "r");
1416 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1418 if (GET_CODE (XEXP (x
, 1)) == REG
)
1419 fprintf (file
, "x");
1421 fprintf (file
, "d");
1426 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1428 output_addr_const (file
, GEN_INT (val
));
1432 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1434 output_addr_const (file
, GEN_INT (val
));
1438 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1440 output_addr_const (file
, GEN_INT (val
));
1444 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1445 val
= (val
>> 3) & 0x1f;
1446 output_addr_const (file
, GEN_INT (val
));
1450 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1453 output_addr_const (file
, GEN_INT (val
));
1457 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1460 output_addr_const (file
, GEN_INT (val
));
1464 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1467 output_addr_const (file
, GEN_INT (val
));
1471 val
= xcode
== CONST_INT
? INTVAL (x
) : INTVAL (CONST_VECTOR_ELT (x
, 0));
1472 val
= -(val
& -8ll);
1473 val
= (val
>> 3) & 0x1f;
1474 output_addr_const (file
, GEN_INT (val
));
1479 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1480 else if (xcode
== MEM
)
1481 output_address (XEXP (x
, 0));
1482 else if (xcode
== CONST_VECTOR
)
1483 print_operand (file
, CONST_VECTOR_ELT (x
, 0), 0);
1485 output_addr_const (file
, x
);
1492 output_operand_lossage ("invalid %%xn code");
1497 extern char call_used_regs
[];
1499 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1500 caller saved register. For leaf functions it is more efficient to
1501 use a volatile register because we won't need to save and restore the
1502 pic register. This routine is only valid after register allocation
1503 is completed, so we can pick an unused register. */
1507 rtx pic_reg
= pic_offset_table_rtx
;
1508 if (!reload_completed
&& !reload_in_progress
)
1513 /* Split constant addresses to handle cases that are too large.
1514 Add in the pic register when in PIC mode.
1515 Split immediates that require more than 1 instruction. */
1517 spu_split_immediate (rtx
* ops
)
1519 enum machine_mode mode
= GET_MODE (ops
[0]);
1520 enum immediate_class c
= classify_immediate (ops
[1], mode
);
1526 unsigned char arrhi
[16];
1527 unsigned char arrlo
[16];
1530 constant_to_array (mode
, ops
[1], arrhi
);
1531 to
= !can_create_pseudo_p () ? ops
[0] : gen_reg_rtx (mode
);
1532 for (i
= 0; i
< 16; i
+= 4)
1534 arrlo
[i
+ 2] = arrhi
[i
+ 2];
1535 arrlo
[i
+ 3] = arrhi
[i
+ 3];
1536 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
1537 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
1539 hi
= array_to_constant (mode
, arrhi
);
1540 lo
= array_to_constant (mode
, arrlo
);
1541 emit_move_insn (to
, hi
);
1542 emit_insn (gen_rtx_SET
1543 (VOIDmode
, ops
[0], gen_rtx_IOR (mode
, to
, lo
)));
1548 unsigned char arr_fsmbi
[16];
1549 unsigned char arr_andbi
[16];
1550 rtx to
, reg_fsmbi
, reg_and
;
1552 enum machine_mode imode
= mode
;
1553 /* We need to do reals as ints because the constant used in the
1554 * AND might not be a legitimate real constant. */
1555 imode
= int_mode_for_mode (mode
);
1556 constant_to_array (mode
, ops
[1], arr_fsmbi
);
1558 to
= simplify_gen_subreg(imode
, ops
[0], GET_MODE (ops
[0]), 0);
1561 for (i
= 0; i
< 16; i
++)
1562 if (arr_fsmbi
[i
] != 0)
1564 arr_andbi
[0] = arr_fsmbi
[i
];
1565 arr_fsmbi
[i
] = 0xff;
1567 for (i
= 1; i
< 16; i
++)
1568 arr_andbi
[i
] = arr_andbi
[0];
1569 reg_fsmbi
= array_to_constant (imode
, arr_fsmbi
);
1570 reg_and
= array_to_constant (imode
, arr_andbi
);
1571 emit_move_insn (to
, reg_fsmbi
);
1572 emit_insn (gen_rtx_SET
1573 (VOIDmode
, to
, gen_rtx_AND (imode
, to
, reg_and
)));
1577 if (reload_in_progress
|| reload_completed
)
1579 rtx mem
= force_const_mem (mode
, ops
[1]);
1580 if (TARGET_LARGE_MEM
)
1582 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
1583 emit_move_insn (addr
, XEXP (mem
, 0));
1584 mem
= replace_equiv_address (mem
, addr
);
1586 emit_move_insn (ops
[0], mem
);
1592 if (reload_completed
&& GET_CODE (ops
[1]) != HIGH
)
1596 emit_move_insn (ops
[0], gen_rtx_HIGH (mode
, ops
[1]));
1597 emit_move_insn (ops
[0], gen_rtx_LO_SUM (mode
, ops
[0], ops
[1]));
1600 emit_insn (gen_pic (ops
[0], ops
[1]));
1603 rtx pic_reg
= get_pic_reg ();
1604 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1605 current_function_uses_pic_offset_table
= 1;
1607 return flag_pic
|| c
== IC_IL2s
;
1618 /* SAVING is TRUE when we are generating the actual load and store
1619 instructions for REGNO. When determining the size of the stack
1620 needed for saving register we must allocate enough space for the
1621 worst case, because we don't always have the information early enough
1622 to not allocate it. But we can at least eliminate the actual loads
1623 and stores during the prologue/epilogue. */
1625 need_to_save_reg (int regno
, int saving
)
1627 if (df_regs_ever_live_p (regno
) && !call_used_regs
[regno
])
1630 && regno
== PIC_OFFSET_TABLE_REGNUM
1631 && (!saving
|| current_function_uses_pic_offset_table
)
1633 || !current_function_is_leaf
|| df_regs_ever_live_p (LAST_ARG_REGNUM
)))
1638 /* This function is only correct starting with local register
1641 spu_saved_regs_size (void)
1643 int reg_save_size
= 0;
1646 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1647 if (need_to_save_reg (regno
, 0))
1648 reg_save_size
+= 0x10;
1649 return reg_save_size
;
1653 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1655 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1657 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1658 return emit_insn (gen_movv4si (mem
, reg
));
1662 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1664 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1666 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1667 return emit_insn (gen_movv4si (reg
, mem
));
1670 /* This happens after reload, so we need to expand it. */
1672 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1675 if (satisfies_constraint_K (GEN_INT (imm
)))
1677 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1681 emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1682 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1683 if (REGNO (src
) == REGNO (scratch
))
1689 /* Return nonzero if this function is known to have a null epilogue. */
1692 direct_return (void)
1694 if (reload_completed
)
1696 if (cfun
->static_chain_decl
== 0
1697 && (spu_saved_regs_size ()
1699 + current_function_outgoing_args_size
1700 + current_function_pretend_args_size
== 0)
1701 && current_function_is_leaf
)
1708 The stack frame looks like this:
1715 prev SP | back chain |
1718 | reg save | current_function_pretend_args_size bytes
1721 | saved regs | spu_saved_regs_size() bytes
1724 FP | vars | get_frame_size() bytes
1728 | args | current_function_outgoing_args_size bytes
1738 spu_expand_prologue (void)
1740 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1741 HOST_WIDE_INT total_size
;
1742 HOST_WIDE_INT saved_regs_size
;
1743 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1744 rtx scratch_reg_0
, scratch_reg_1
;
1747 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1748 the "toplevel" insn chain. */
1749 emit_note (NOTE_INSN_DELETED
);
1751 if (flag_pic
&& optimize
== 0)
1752 current_function_uses_pic_offset_table
= 1;
1754 if (spu_naked_function_p (current_function_decl
))
1757 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1758 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1760 saved_regs_size
= spu_saved_regs_size ();
1761 total_size
= size
+ saved_regs_size
1762 + current_function_outgoing_args_size
1763 + current_function_pretend_args_size
;
1765 if (!current_function_is_leaf
1766 || current_function_calls_alloca
|| total_size
> 0)
1767 total_size
+= STACK_POINTER_OFFSET
;
1769 /* Save this first because code after this might use the link
1770 register as a scratch register. */
1771 if (!current_function_is_leaf
)
1773 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1774 RTX_FRAME_RELATED_P (insn
) = 1;
1779 offset
= -current_function_pretend_args_size
;
1780 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1781 if (need_to_save_reg (regno
, 1))
1784 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1785 RTX_FRAME_RELATED_P (insn
) = 1;
1789 if (flag_pic
&& current_function_uses_pic_offset_table
)
1791 rtx pic_reg
= get_pic_reg ();
1792 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1793 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1798 if (flag_stack_check
)
1800 /* We compare against total_size-1 because
1801 ($sp >= total_size) <=> ($sp > total_size-1) */
1802 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1803 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1804 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1805 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1807 emit_move_insn (scratch_v4si
, size_v4si
);
1808 size_v4si
= scratch_v4si
;
1810 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1811 emit_insn (gen_vec_extractv4si
1812 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1813 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1816 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1817 the value of the previous $sp because we save it as the back
1819 if (total_size
<= 2000)
1821 /* In this case we save the back chain first. */
1822 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1824 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1826 else if (satisfies_constraint_K (GEN_INT (-total_size
)))
1828 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1830 emit_insn (gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
)));
1834 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1836 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1838 RTX_FRAME_RELATED_P (insn
) = 1;
1839 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1841 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, real
, REG_NOTES (insn
));
1843 if (total_size
> 2000)
1845 /* Save the back chain ptr */
1846 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1849 if (frame_pointer_needed
)
1851 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1852 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1853 + current_function_outgoing_args_size
;
1854 /* Set the new frame_pointer */
1855 insn
= frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1856 RTX_FRAME_RELATED_P (insn
) = 1;
1857 real
= gen_addsi3 (fp_reg
, sp_reg
, GEN_INT (fp_offset
));
1859 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
,
1860 real
, REG_NOTES (insn
));
1861 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = STACK_BOUNDARY
;
1865 emit_note (NOTE_INSN_DELETED
);
1869 spu_expand_epilogue (bool sibcall_p
)
1871 int size
= get_frame_size (), offset
, regno
;
1872 HOST_WIDE_INT saved_regs_size
, total_size
;
1873 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1874 rtx jump
, scratch_reg_0
;
1876 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1877 the "toplevel" insn chain. */
1878 emit_note (NOTE_INSN_DELETED
);
1880 if (spu_naked_function_p (current_function_decl
))
1883 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1885 saved_regs_size
= spu_saved_regs_size ();
1886 total_size
= size
+ saved_regs_size
1887 + current_function_outgoing_args_size
1888 + current_function_pretend_args_size
;
1890 if (!current_function_is_leaf
1891 || current_function_calls_alloca
|| total_size
> 0)
1892 total_size
+= STACK_POINTER_OFFSET
;
1896 if (current_function_calls_alloca
)
1897 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1899 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1902 if (saved_regs_size
> 0)
1904 offset
= -current_function_pretend_args_size
;
1905 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1906 if (need_to_save_reg (regno
, 1))
1909 frame_emit_load (regno
, sp_reg
, offset
);
1914 if (!current_function_is_leaf
)
1915 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1919 emit_insn (gen_rtx_USE
1920 (VOIDmode
, gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
)));
1921 jump
= emit_jump_insn (gen__return ());
1922 emit_barrier_after (jump
);
1925 emit_note (NOTE_INSN_DELETED
);
1929 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1933 /* This is inefficient because it ends up copying to a save-register
1934 which then gets saved even though $lr has already been saved. But
1935 it does generate better code for leaf functions and we don't need
1936 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1937 used for __builtin_return_address anyway, so maybe we don't care if
1938 it's inefficient. */
1939 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1943 /* Given VAL, generate a constant appropriate for MODE.
1944 If MODE is a vector mode, every element will be VAL.
1945 For TImode, VAL will be zero extended to 128 bits. */
1947 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
1953 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1954 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1955 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1956 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1958 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1959 return immed_double_const (val
, 0, mode
);
1961 /* val is the bit representation of the float */
1962 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1963 return hwint_to_const_double (mode
, val
);
1965 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1966 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1968 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1970 units
= GET_MODE_NUNITS (mode
);
1972 v
= rtvec_alloc (units
);
1974 for (i
= 0; i
< units
; ++i
)
1975 RTVEC_ELT (v
, i
) = inner
;
1977 return gen_rtx_CONST_VECTOR (mode
, v
);
1980 /* branch hint stuff */
1982 /* The hardware requires 8 insns between a hint and the branch it
1983 effects. This variable describes how many rtl instructions the
1984 compiler needs to see before inserting a hint. (FIXME: We should
1985 accept less and insert nops to enforce it because hinting is always
1986 profitable for performance, but we do need to be careful of code
1988 int spu_hint_dist
= (8 * 4);
1990 /* Create a MODE vector constant from 4 ints. */
1992 spu_const_from_ints(enum machine_mode mode
, int a
, int b
, int c
, int d
)
1994 unsigned char arr
[16];
1995 arr
[0] = (a
>> 24) & 0xff;
1996 arr
[1] = (a
>> 16) & 0xff;
1997 arr
[2] = (a
>> 8) & 0xff;
1998 arr
[3] = (a
>> 0) & 0xff;
1999 arr
[4] = (b
>> 24) & 0xff;
2000 arr
[5] = (b
>> 16) & 0xff;
2001 arr
[6] = (b
>> 8) & 0xff;
2002 arr
[7] = (b
>> 0) & 0xff;
2003 arr
[8] = (c
>> 24) & 0xff;
2004 arr
[9] = (c
>> 16) & 0xff;
2005 arr
[10] = (c
>> 8) & 0xff;
2006 arr
[11] = (c
>> 0) & 0xff;
2007 arr
[12] = (d
>> 24) & 0xff;
2008 arr
[13] = (d
>> 16) & 0xff;
2009 arr
[14] = (d
>> 8) & 0xff;
2010 arr
[15] = (d
>> 0) & 0xff;
2011 return array_to_constant(mode
, arr
);
2014 /* An array of these is used to propagate hints to predecessor blocks. */
2017 rtx prop_jump
; /* propagated from another block */
2018 basic_block bb
; /* the original block. */
2021 /* The special $hbr register is used to prevent the insn scheduler from
2022 moving hbr insns across instructions which invalidate them. It
2023 should only be used in a clobber, and this function searches for
2024 insns which clobber it. */
2026 insn_clobbers_hbr (rtx insn
)
2028 if (INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PARALLEL
)
2030 rtx parallel
= PATTERN (insn
);
2033 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
2035 clobber
= XVECEXP (parallel
, 0, j
);
2036 if (GET_CODE (clobber
) == CLOBBER
2037 && GET_CODE (XEXP (clobber
, 0)) == REG
2038 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
2046 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
, int distance
)
2049 rtx hint
, insn
, prev
, next
;
2051 if (before
== 0 || branch
== 0 || target
== 0)
2058 branch_label
= gen_label_rtx ();
2059 LABEL_NUSES (branch_label
)++;
2060 LABEL_PRESERVE_P (branch_label
) = 1;
2061 insn
= emit_label_before (branch_label
, branch
);
2062 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
2064 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2065 the current insn is pipe0, dual issue with it. */
2066 prev
= prev_active_insn (before
);
2067 if (prev
&& get_pipe (prev
) == 0)
2068 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2069 else if (get_pipe (before
) == 0 && distance
> spu_hint_dist
)
2071 next
= next_active_insn (before
);
2072 hint
= emit_insn_after (gen_hbr (branch_label
, target
), before
);
2074 PUT_MODE (next
, TImode
);
2078 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
2079 PUT_MODE (hint
, TImode
);
2081 recog_memoized (hint
);
2084 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2085 the rtx for the branch target. */
2087 get_branch_target (rtx branch
)
2089 if (GET_CODE (branch
) == JUMP_INSN
)
2093 /* Return statements */
2094 if (GET_CODE (PATTERN (branch
)) == RETURN
)
2095 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2098 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
2099 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
2102 set
= single_set (branch
);
2103 src
= SET_SRC (set
);
2104 if (GET_CODE (SET_DEST (set
)) != PC
)
2107 if (GET_CODE (src
) == IF_THEN_ELSE
)
2110 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
2113 /* If the more probable case is not a fall through, then
2114 try a branch hint. */
2115 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
2116 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
2117 && GET_CODE (XEXP (src
, 1)) != PC
)
2118 lab
= XEXP (src
, 1);
2119 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
2120 && GET_CODE (XEXP (src
, 2)) != PC
)
2121 lab
= XEXP (src
, 2);
2125 if (GET_CODE (lab
) == RETURN
)
2126 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
2134 else if (GET_CODE (branch
) == CALL_INSN
)
2137 /* All of our call patterns are in a PARALLEL and the CALL is
2138 the first pattern in the PARALLEL. */
2139 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
2141 call
= XVECEXP (PATTERN (branch
), 0, 0);
2142 if (GET_CODE (call
) == SET
)
2143 call
= SET_SRC (call
);
2144 if (GET_CODE (call
) != CALL
)
2146 return XEXP (XEXP (call
, 0), 0);
2152 insert_branch_hints (void)
2154 struct spu_bb_info
*spu_bb_info
;
2155 rtx branch
, insn
, next
;
2156 rtx branch_target
= 0;
2157 int branch_addr
= 0, insn_addr
, head_addr
;
2162 (struct spu_bb_info
*) xcalloc (last_basic_block
+ 1,
2163 sizeof (struct spu_bb_info
));
2165 /* We need exact insn addresses and lengths. */
2166 shorten_branches (get_insns ());
2168 FOR_EACH_BB_REVERSE (bb
)
2170 head_addr
= INSN_ADDRESSES (INSN_UID (BB_HEAD (bb
)));
2172 if (spu_bb_info
[bb
->index
].prop_jump
)
2174 branch
= spu_bb_info
[bb
->index
].prop_jump
;
2175 branch_target
= get_branch_target (branch
);
2176 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
2178 /* Search from end of a block to beginning. In this loop, find
2179 jumps which need a branch and emit them only when:
2180 - it's an indirect branch and we're at the insn which sets
2182 - we're at an insn that will invalidate the hint. e.g., a
2183 call, another hint insn, inline asm that clobbers $hbr, and
2184 some inlined operations (divmodsi4). Don't consider jumps
2185 because they are only at the end of a block and are
2186 considered when we are deciding whether to propagate
2187 - we're getting too far away from the branch. The hbr insns
2188 only have a signed 10-bit offset
2189 We go back as far as possible so the branch will be considered
2190 for propagation when we get to the beginning of the block. */
2192 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
2196 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
2198 && ((GET_CODE (branch_target
) == REG
2199 && set_of (branch_target
, insn
) != NULL_RTX
)
2200 || insn_clobbers_hbr (insn
)
2201 || branch_addr
- insn_addr
> 600))
2203 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2204 if (insn
!= BB_END (bb
)
2205 && branch_addr
- next_addr
>= spu_hint_dist
)
2209 "hint for %i in block %i before %i\n",
2210 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2211 spu_emit_branch_hint (next
, branch
, branch_target
,
2212 branch_addr
- next_addr
);
2217 /* JUMP_P will only be true at the end of a block. When
2218 branch is already set it means we've previously decided
2219 to propagate a hint for that branch into this block. */
2220 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
2223 if ((branch_target
= get_branch_target (insn
)))
2226 branch_addr
= insn_addr
;
2230 /* When a branch hint is emitted it will be inserted
2231 before "next". Make sure next is the beginning of a
2232 cycle to minimize impact on the scheduled insns. */
2233 if (GET_MODE (insn
) == TImode
)
2236 if (insn
== BB_HEAD (bb
))
2242 /* If we haven't emitted a hint for this branch yet, it might
2243 be profitable to emit it in one of the predecessor blocks,
2244 especially for loops. */
2246 basic_block prev
= 0, prop
= 0, prev2
= 0;
2247 int loop_exit
= 0, simple_loop
= 0;
2250 next_addr
= INSN_ADDRESSES (INSN_UID (next
));
2252 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
2253 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
2254 prev
= EDGE_PRED (bb
, j
)->src
;
2256 prev2
= EDGE_PRED (bb
, j
)->src
;
2258 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
2259 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
2261 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
2264 /* If this branch is a loop exit then propagate to previous
2265 fallthru block. This catches the cases when it is a simple
2266 loop or when there is an initial branch into the loop. */
2267 if (prev
&& loop_exit
&& prev
->loop_depth
<= bb
->loop_depth
)
2270 /* If there is only one adjacent predecessor. Don't propagate
2271 outside this loop. This loop_depth test isn't perfect, but
2272 I'm not sure the loop_father member is valid at this point. */
2273 else if (prev
&& single_pred_p (bb
)
2274 && prev
->loop_depth
== bb
->loop_depth
)
2277 /* If this is the JOIN block of a simple IF-THEN then
2278 propagate the hint to the HEADER block. */
2279 else if (prev
&& prev2
2280 && EDGE_COUNT (bb
->preds
) == 2
2281 && EDGE_COUNT (prev
->preds
) == 1
2282 && EDGE_PRED (prev
, 0)->src
== prev2
2283 && prev2
->loop_depth
== bb
->loop_depth
2284 && GET_CODE (branch_target
) != REG
)
2287 /* Don't propagate when:
2288 - this is a simple loop and the hint would be too far
2289 - this is not a simple loop and there are 16 insns in
2291 - the predecessor block ends in a branch that will be
2293 - the predecessor block ends in an insn that invalidates
2297 && (bbend
= BB_END (prop
))
2298 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
2299 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
2300 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
2303 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
2304 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2305 bb
->index
, prop
->index
, bb
->loop_depth
,
2306 INSN_UID (branch
), loop_exit
, simple_loop
,
2307 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
2309 spu_bb_info
[prop
->index
].prop_jump
= branch
;
2310 spu_bb_info
[prop
->index
].bb
= bb
;
2312 else if (next
&& branch_addr
- next_addr
>= spu_hint_dist
)
2315 fprintf (dump_file
, "hint for %i in block %i before %i\n",
2316 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
2317 spu_emit_branch_hint (next
, branch
, branch_target
,
2318 branch_addr
- next_addr
);
2326 /* Emit a nop for INSN such that the two will dual issue. This assumes
2327 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2328 We check for TImode to handle a MULTI1 insn which has dual issued its
2329 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2332 emit_nop_for_insn (rtx insn
)
2336 p
= get_pipe (insn
);
2337 if (p
== 1 && GET_MODE (insn
) == TImode
)
2339 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2340 PUT_MODE (new_insn
, TImode
);
2341 PUT_MODE (insn
, VOIDmode
);
2344 new_insn
= emit_insn_after (gen_lnop (), insn
);
2347 /* Insert nops in basic blocks to meet dual issue alignment
2352 rtx insn
, next_insn
, prev_insn
;
2356 /* This sets up INSN_ADDRESSES. */
2357 shorten_branches (get_insns ());
2359 /* Keep track of length added by nops. */
2363 for (insn
= get_insns (); insn
; insn
= next_insn
)
2365 next_insn
= next_active_insn (insn
);
2366 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2367 if (GET_MODE (insn
) == TImode
2369 && GET_MODE (next_insn
) != TImode
2370 && ((addr
+ length
) & 7) != 0)
2372 /* prev_insn will always be set because the first insn is
2373 always 8-byte aligned. */
2374 emit_nop_for_insn (prev_insn
);
2382 spu_machine_dependent_reorg (void)
2386 if (TARGET_BRANCH_HINTS
)
2387 insert_branch_hints ();
2393 /* Insn scheduling routines, primarily for dual issue. */
2395 spu_sched_issue_rate (void)
2401 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED
,
2402 int verbose ATTRIBUTE_UNUSED
, rtx insn
,
2405 if (GET_CODE (PATTERN (insn
)) != USE
2406 && GET_CODE (PATTERN (insn
)) != CLOBBER
2407 && get_pipe (insn
) != -2)
2409 return can_issue_more
;
2416 /* Handle inline asm */
2417 if (INSN_CODE (insn
) == -1)
2419 t
= get_attr_type (insn
);
2435 case TYPE_IPREFETCH
:
2452 spu_sched_adjust_priority (rtx insn
, int pri
)
2454 int p
= get_pipe (insn
);
2455 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2457 if (GET_CODE (PATTERN (insn
)) == USE
2458 || GET_CODE (PATTERN (insn
)) == CLOBBER
2461 /* Schedule pipe0 insns early for greedier dual issue. */
2467 /* INSN is dependent on DEP_INSN. */
2469 spu_sched_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
,
2470 rtx dep_insn ATTRIBUTE_UNUSED
, int cost
)
2472 if (GET_CODE (insn
) == CALL_INSN
)
2474 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2475 scheduler makes every insn in a block anti-dependent on the final
2476 jump_insn. We adjust here so higher cost insns will get scheduled
2478 if (GET_CODE (insn
) == JUMP_INSN
&& REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
2479 return insn_cost (dep_insn
) - 3;
2483 /* Create a CONST_DOUBLE from a string. */
2485 spu_float_const (const char *string
, enum machine_mode mode
)
2487 REAL_VALUE_TYPE value
;
2488 value
= REAL_VALUE_ATOF (string
, mode
);
2489 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
2492 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2493 CONST_INT fits constraint 'K', i.e., is small. */
2495 legitimate_const (rtx x
, int aligned
)
2497 /* We can never know if the resulting address fits in 18 bits and can be
2498 loaded with ila. Instead we should use the HI and LO relocations to
2499 load a 32-bit address. */
2502 gcc_assert (GET_CODE (x
) == CONST
);
2504 if (GET_CODE (XEXP (x
, 0)) != PLUS
)
2506 sym
= XEXP (XEXP (x
, 0), 0);
2507 cst
= XEXP (XEXP (x
, 0), 1);
2508 if (GET_CODE (sym
) != SYMBOL_REF
|| GET_CODE (cst
) != CONST_INT
)
2510 if (aligned
&& ((INTVAL (cst
) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym
)))
2512 return satisfies_constraint_K (cst
);
2516 spu_constant_address_p (rtx x
)
2518 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
2519 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
2520 || GET_CODE (x
) == HIGH
);
2523 static enum spu_immediate
2524 which_immediate_load (HOST_WIDE_INT val
)
2526 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2528 if (val
>= -0x8000 && val
<= 0x7fff)
2530 if (val
>= 0 && val
<= 0x3ffff)
2532 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2534 if ((val
& 0xffff) == 0)
2540 /* Return true when OP can be loaded by one of the il instructions, or
2541 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2543 immediate_load_p (rtx op
, enum machine_mode mode
)
2545 if (CONSTANT_P (op
))
2547 enum immediate_class c
= classify_immediate (op
, mode
);
2548 return c
== IC_IL1
|| c
== IC_IL1s
2549 || (!epilogue_completed
&& (c
== IC_IL2
|| c
== IC_IL2s
));
2554 /* Return true if the first SIZE bytes of arr is a constant that can be
2555 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2556 represent the size and offset of the instruction to use. */
2558 cpat_info(unsigned char *arr
, int size
, int *prun
, int *pstart
)
2560 int cpat
, run
, i
, start
;
2564 for (i
= 0; i
< size
&& cpat
; i
++)
2572 else if (arr
[i
] == 2 && arr
[i
+1] == 3)
2574 else if (arr
[i
] == 0)
2576 while (arr
[i
+run
] == run
&& i
+run
< 16)
2578 if (run
!= 4 && run
!= 8)
2583 if ((i
& (run
-1)) != 0)
2590 if (cpat
&& (run
|| size
< 16))
2597 *pstart
= start
== -1 ? 16-run
: start
;
2603 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2604 it into a register. MODE is only valid when OP is a CONST_INT. */
2605 static enum immediate_class
2606 classify_immediate (rtx op
, enum machine_mode mode
)
2609 unsigned char arr
[16];
2610 int i
, j
, repeated
, fsmbi
, repeat
;
2612 gcc_assert (CONSTANT_P (op
));
2614 if (GET_MODE (op
) != VOIDmode
)
2615 mode
= GET_MODE (op
);
2617 /* A V4SI const_vector with all identical symbols is ok. */
2620 && GET_CODE (op
) == CONST_VECTOR
2621 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_INT
2622 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) != CONST_DOUBLE
2623 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
2624 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
2625 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3))
2626 op
= CONST_VECTOR_ELT (op
, 0);
2628 switch (GET_CODE (op
))
2632 return TARGET_LARGE_MEM
? IC_IL2s
: IC_IL1s
;
2635 return TARGET_LARGE_MEM
2636 || !legitimate_const (op
, 0) ? IC_IL2s
: IC_IL1s
;
2642 for (i
= 0; i
< GET_MODE_NUNITS (mode
); i
++)
2643 if (GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_INT
2644 && GET_CODE (CONST_VECTOR_ELT (op
, i
)) != CONST_DOUBLE
)
2650 constant_to_array (mode
, op
, arr
);
2652 /* Check that each 4-byte slot is identical. */
2654 for (i
= 4; i
< 16; i
+= 4)
2655 for (j
= 0; j
< 4; j
++)
2656 if (arr
[j
] != arr
[i
+ j
])
2661 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2662 val
= trunc_int_for_mode (val
, SImode
);
2664 if (which_immediate_load (val
) != SPU_NONE
)
2668 /* Any mode of 2 bytes or smaller can be loaded with an il
2670 gcc_assert (GET_MODE_SIZE (mode
) > 2);
2674 for (i
= 0; i
< 16 && fsmbi
; i
++)
2675 if (arr
[i
] != 0 && repeat
== 0)
2677 else if (arr
[i
] != 0 && arr
[i
] != repeat
)
2680 return repeat
== 0xff ? IC_FSMBI
: IC_FSMBI2
;
2682 if (cpat_info (arr
, GET_MODE_SIZE (mode
), 0, 0))
2695 static enum spu_immediate
2696 which_logical_immediate (HOST_WIDE_INT val
)
2698 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2700 if (val
>= -0x200 && val
<= 0x1ff)
2702 if (val
>= 0 && val
<= 0xffff)
2704 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2706 val
= trunc_int_for_mode (val
, HImode
);
2707 if (val
>= -0x200 && val
<= 0x1ff)
2709 if ((val
& 0xff) == ((val
>> 8) & 0xff))
2711 val
= trunc_int_for_mode (val
, QImode
);
2712 if (val
>= -0x200 && val
<= 0x1ff)
2719 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2722 const_vector_immediate_p (rtx x
)
2725 gcc_assert (GET_CODE (x
) == CONST_VECTOR
);
2726 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
2727 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
2728 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
2734 logical_immediate_p (rtx op
, enum machine_mode mode
)
2737 unsigned char arr
[16];
2740 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2741 || GET_CODE (op
) == CONST_VECTOR
);
2743 if (GET_CODE (op
) == CONST_VECTOR
2744 && !const_vector_immediate_p (op
))
2747 if (GET_MODE (op
) != VOIDmode
)
2748 mode
= GET_MODE (op
);
2750 constant_to_array (mode
, op
, arr
);
2752 /* Check that bytes are repeated. */
2753 for (i
= 4; i
< 16; i
+= 4)
2754 for (j
= 0; j
< 4; j
++)
2755 if (arr
[j
] != arr
[i
+ j
])
2758 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2759 val
= trunc_int_for_mode (val
, SImode
);
2761 i
= which_logical_immediate (val
);
2762 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
2766 iohl_immediate_p (rtx op
, enum machine_mode mode
)
2769 unsigned char arr
[16];
2772 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2773 || GET_CODE (op
) == CONST_VECTOR
);
2775 if (GET_CODE (op
) == CONST_VECTOR
2776 && !const_vector_immediate_p (op
))
2779 if (GET_MODE (op
) != VOIDmode
)
2780 mode
= GET_MODE (op
);
2782 constant_to_array (mode
, op
, arr
);
2784 /* Check that bytes are repeated. */
2785 for (i
= 4; i
< 16; i
+= 4)
2786 for (j
= 0; j
< 4; j
++)
2787 if (arr
[j
] != arr
[i
+ j
])
2790 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2791 val
= trunc_int_for_mode (val
, SImode
);
2793 return val
>= 0 && val
<= 0xffff;
2797 arith_immediate_p (rtx op
, enum machine_mode mode
,
2798 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
2801 unsigned char arr
[16];
2804 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2805 || GET_CODE (op
) == CONST_VECTOR
);
2807 if (GET_CODE (op
) == CONST_VECTOR
2808 && !const_vector_immediate_p (op
))
2811 if (GET_MODE (op
) != VOIDmode
)
2812 mode
= GET_MODE (op
);
2814 constant_to_array (mode
, op
, arr
);
2816 if (VECTOR_MODE_P (mode
))
2817 mode
= GET_MODE_INNER (mode
);
2819 bytes
= GET_MODE_SIZE (mode
);
2820 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
2822 /* Check that bytes are repeated. */
2823 for (i
= bytes
; i
< 16; i
+= bytes
)
2824 for (j
= 0; j
< bytes
; j
++)
2825 if (arr
[j
] != arr
[i
+ j
])
2829 for (j
= 1; j
< bytes
; j
++)
2830 val
= (val
<< 8) | arr
[j
];
2832 val
= trunc_int_for_mode (val
, mode
);
2834 return val
>= low
&& val
<= high
;
2838 - any 32-bit constant (SImode, SFmode)
2839 - any constant that can be generated with fsmbi (any mode)
2840 - a 64-bit constant where the high and low bits are identical
2842 - a 128-bit constant where the four 32-bit words match. */
2844 spu_legitimate_constant_p (rtx x
)
2846 if (GET_CODE (x
) == HIGH
)
2848 /* V4SI with all identical symbols is valid. */
2850 && GET_MODE (x
) == V4SImode
2851 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
2852 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
2853 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
))
2854 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
2855 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
2856 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
2858 if (GET_CODE (x
) == CONST_VECTOR
2859 && !const_vector_immediate_p (x
))
2864 /* Valid address are:
2865 - symbol_ref, label_ref, const
2867 - reg + const, where either reg or const is 16 byte aligned
2868 - reg + reg, alignment doesn't matter
2869 The alignment matters in the reg+const case because lqd and stqd
2870 ignore the 4 least significant bits of the const. (TODO: It might be
2871 preferable to allow any alignment and fix it up when splitting.) */
2873 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED
,
2874 rtx x
, int reg_ok_strict
)
2876 if (mode
== TImode
&& GET_CODE (x
) == AND
2877 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2878 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) -16)
2880 switch (GET_CODE (x
))
2884 return !TARGET_LARGE_MEM
;
2887 return !TARGET_LARGE_MEM
&& legitimate_const (x
, 0);
2890 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
2894 gcc_assert (GET_CODE (x
) == REG
);
2897 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
2902 rtx op0
= XEXP (x
, 0);
2903 rtx op1
= XEXP (x
, 1);
2904 if (GET_CODE (op0
) == SUBREG
)
2905 op0
= XEXP (op0
, 0);
2906 if (GET_CODE (op1
) == SUBREG
)
2907 op1
= XEXP (op1
, 0);
2908 /* We can't just accept any aligned register because CSE can
2909 change it to a register that is not marked aligned and then
2910 recog will fail. So we only accept frame registers because
2911 they will only be changed to other frame registers. */
2912 if (GET_CODE (op0
) == REG
2913 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2914 && GET_CODE (op1
) == CONST_INT
2915 && INTVAL (op1
) >= -0x2000
2916 && INTVAL (op1
) <= 0x1fff
2917 && (regno_aligned_for_load (REGNO (op0
)) || (INTVAL (op1
) & 15) == 0))
2919 if (GET_CODE (op0
) == REG
2920 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2921 && GET_CODE (op1
) == REG
2922 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
2933 /* When the address is reg + const_int, force the const_int into a
2936 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
2937 enum machine_mode mode
)
2940 /* Make sure both operands are registers. */
2941 if (GET_CODE (x
) == PLUS
)
2945 if (ALIGNED_SYMBOL_REF_P (op0
))
2947 op0
= force_reg (Pmode
, op0
);
2948 mark_reg_pointer (op0
, 128);
2950 else if (GET_CODE (op0
) != REG
)
2951 op0
= force_reg (Pmode
, op0
);
2952 if (ALIGNED_SYMBOL_REF_P (op1
))
2954 op1
= force_reg (Pmode
, op1
);
2955 mark_reg_pointer (op1
, 128);
2957 else if (GET_CODE (op1
) != REG
)
2958 op1
= force_reg (Pmode
, op1
);
2959 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
2960 if (spu_legitimate_address (mode
, x
, 0))
2966 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2967 struct attribute_spec.handler. */
2969 spu_handle_fndecl_attribute (tree
* node
,
2971 tree args ATTRIBUTE_UNUSED
,
2972 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2974 if (TREE_CODE (*node
) != FUNCTION_DECL
)
2976 warning (0, "`%s' attribute only applies to functions",
2977 IDENTIFIER_POINTER (name
));
2978 *no_add_attrs
= true;
2984 /* Handle the "vector" attribute. */
2986 spu_handle_vector_attribute (tree
* node
, tree name
,
2987 tree args ATTRIBUTE_UNUSED
,
2988 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2990 tree type
= *node
, result
= NULL_TREE
;
2991 enum machine_mode mode
;
2994 while (POINTER_TYPE_P (type
)
2995 || TREE_CODE (type
) == FUNCTION_TYPE
2996 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
2997 type
= TREE_TYPE (type
);
2999 mode
= TYPE_MODE (type
);
3001 unsigned_p
= TYPE_UNSIGNED (type
);
3005 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
3008 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
3011 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
3014 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
3017 result
= V4SF_type_node
;
3020 result
= V2DF_type_node
;
3026 /* Propagate qualifiers attached to the element type
3027 onto the vector type. */
3028 if (result
&& result
!= type
&& TYPE_QUALS (type
))
3029 result
= build_qualified_type (result
, TYPE_QUALS (type
));
3031 *no_add_attrs
= true; /* No need to hang on to the attribute. */
3034 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name
));
3036 *node
= reconstruct_complex_type (*node
, result
);
3041 /* Return nonzero if FUNC is a naked function. */
3043 spu_naked_function_p (tree func
)
3047 if (TREE_CODE (func
) != FUNCTION_DECL
)
3050 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
3051 return a
!= NULL_TREE
;
3055 spu_initial_elimination_offset (int from
, int to
)
3057 int saved_regs_size
= spu_saved_regs_size ();
3059 if (!current_function_is_leaf
|| current_function_outgoing_args_size
3060 || get_frame_size () || saved_regs_size
)
3061 sp_offset
= STACK_POINTER_OFFSET
;
3062 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3063 return (sp_offset
+ current_function_outgoing_args_size
);
3064 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3066 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
3067 return sp_offset
+ current_function_outgoing_args_size
3068 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
3069 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
3070 return get_frame_size () + saved_regs_size
+ sp_offset
;
3075 spu_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
)
3077 enum machine_mode mode
= TYPE_MODE (type
);
3078 int byte_size
= ((mode
== BLKmode
)
3079 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3081 /* Make sure small structs are left justified in a register. */
3082 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3083 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
3085 enum machine_mode smode
;
3088 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
3089 int n
= byte_size
/ UNITS_PER_WORD
;
3090 v
= rtvec_alloc (nregs
);
3091 for (i
= 0; i
< n
; i
++)
3093 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
3094 gen_rtx_REG (TImode
,
3097 GEN_INT (UNITS_PER_WORD
* i
));
3098 byte_size
-= UNITS_PER_WORD
;
3106 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3108 gen_rtx_EXPR_LIST (VOIDmode
,
3109 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
3110 GEN_INT (UNITS_PER_WORD
* n
));
3112 return gen_rtx_PARALLEL (mode
, v
);
3114 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
3118 spu_function_arg (CUMULATIVE_ARGS cum
,
3119 enum machine_mode mode
,
3120 tree type
, int named ATTRIBUTE_UNUSED
)
3124 if (cum
>= MAX_REGISTER_ARGS
)
3127 byte_size
= ((mode
== BLKmode
)
3128 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
3130 /* The ABI does not allow parameters to be passed partially in
3131 reg and partially in stack. */
3132 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
3135 /* Make sure small structs are left justified in a register. */
3136 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
3137 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
3139 enum machine_mode smode
;
3143 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
3144 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
3145 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
3147 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
3150 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
3153 /* Variable sized types are passed by reference. */
3155 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
3156 enum machine_mode mode ATTRIBUTE_UNUSED
,
3157 const_tree type
, bool named ATTRIBUTE_UNUSED
)
3159 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
3165 /* Create and return the va_list datatype.
3167 On SPU, va_list is an array type equivalent to
3169 typedef struct __va_list_tag
3171 void *__args __attribute__((__aligned(16)));
3172 void *__skip __attribute__((__aligned(16)));
3176 where __args points to the arg that will be returned by the next
3177 va_arg(), and __skip points to the previous stack frame such that
3178 when __args == __skip we should advance __args by 32 bytes. */
3180 spu_build_builtin_va_list (void)
3182 tree f_args
, f_skip
, record
, type_decl
;
3185 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
3188 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
3190 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
3191 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
3193 DECL_FIELD_CONTEXT (f_args
) = record
;
3194 DECL_ALIGN (f_args
) = 128;
3195 DECL_USER_ALIGN (f_args
) = 1;
3197 DECL_FIELD_CONTEXT (f_skip
) = record
;
3198 DECL_ALIGN (f_skip
) = 128;
3199 DECL_USER_ALIGN (f_skip
) = 1;
3201 TREE_CHAIN (record
) = type_decl
;
3202 TYPE_NAME (record
) = type_decl
;
3203 TYPE_FIELDS (record
) = f_args
;
3204 TREE_CHAIN (f_args
) = f_skip
;
3206 /* We know this is being padded and we want it too. It is an internal
3207 type so hide the warnings from the user. */
3209 warn_padded
= false;
3211 layout_type (record
);
3215 /* The correct type is an array type of one element. */
3216 return build_array_type (record
, build_index_type (size_zero_node
));
3219 /* Implement va_start by filling the va_list structure VALIST.
3220 NEXTARG points to the first anonymous stack argument.
3222 The following global variables are used to initialize
3223 the va_list structure:
3225 current_function_args_info;
3226 the CUMULATIVE_ARGS for this function
3228 current_function_arg_offset_rtx:
3229 holds the offset of the first anonymous stack argument
3230 (relative to the virtual arg pointer). */
3233 spu_va_start (tree valist
, rtx nextarg
)
3235 tree f_args
, f_skip
;
3238 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3239 f_skip
= TREE_CHAIN (f_args
);
3241 valist
= build_va_arg_indirect_ref (valist
);
3243 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3245 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3247 /* Find the __args area. */
3248 t
= make_tree (TREE_TYPE (args
), nextarg
);
3249 if (current_function_pretend_args_size
> 0)
3250 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (args
), t
,
3251 size_int (-STACK_POINTER_OFFSET
));
3252 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, t
);
3253 TREE_SIDE_EFFECTS (t
) = 1;
3254 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3256 /* Find the __skip area. */
3257 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
3258 t
= build2 (POINTER_PLUS_EXPR
, TREE_TYPE (skip
), t
,
3259 size_int (current_function_pretend_args_size
3260 - STACK_POINTER_OFFSET
));
3261 t
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (skip
), skip
, t
);
3262 TREE_SIDE_EFFECTS (t
) = 1;
3263 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
3266 /* Gimplify va_arg by updating the va_list structure
3267 VALIST as required to retrieve an argument of type
3268 TYPE, and returning that argument.
3270 ret = va_arg(VALIST, TYPE);
3272 generates code equivalent to:
3274 paddedsize = (sizeof(TYPE) + 15) & -16;
3275 if (VALIST.__args + paddedsize > VALIST.__skip
3276 && VALIST.__args <= VALIST.__skip)
3277 addr = VALIST.__skip + 32;
3279 addr = VALIST.__args;
3280 VALIST.__args = addr + paddedsize;
3281 ret = *(TYPE *)addr;
3284 spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
3285 tree
* post_p ATTRIBUTE_UNUSED
)
3287 tree f_args
, f_skip
;
3289 HOST_WIDE_INT size
, rsize
;
3290 tree paddedsize
, addr
, tmp
;
3291 bool pass_by_reference_p
;
3293 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
3294 f_skip
= TREE_CHAIN (f_args
);
3296 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
3298 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
3300 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
3302 addr
= create_tmp_var (ptr_type_node
, "va_arg");
3303 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
3305 /* if an object is dynamically sized, a pointer to it is passed
3306 instead of the object itself. */
3307 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
3309 if (pass_by_reference_p
)
3310 type
= build_pointer_type (type
);
3311 size
= int_size_in_bytes (type
);
3312 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
3314 /* build conditional expression to calculate addr. The expression
3315 will be gimplified later. */
3316 paddedsize
= size_int (rsize
);
3317 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, args
, paddedsize
);
3318 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
3319 build2 (GT_EXPR
, boolean_type_node
, tmp
, skip
),
3320 build2 (LE_EXPR
, boolean_type_node
, args
, skip
));
3322 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
3323 build2 (POINTER_PLUS_EXPR
, ptr_type_node
, skip
,
3324 size_int (32)), args
);
3326 tmp
= build2 (GIMPLE_MODIFY_STMT
, ptr_type_node
, addr
, tmp
);
3327 gimplify_and_add (tmp
, pre_p
);
3329 /* update VALIST.__args */
3330 tmp
= build2 (POINTER_PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
3331 tmp
= build2 (GIMPLE_MODIFY_STMT
, TREE_TYPE (args
), args
, tmp
);
3332 gimplify_and_add (tmp
, pre_p
);
3334 addr
= fold_convert (build_pointer_type (type
), addr
);
3336 if (pass_by_reference_p
)
3337 addr
= build_va_arg_indirect_ref (addr
);
3339 return build_va_arg_indirect_ref (addr
);
3342 /* Save parameter registers starting with the register that corresponds
3343 to the first unnamed parameters. If the first unnamed parameter is
3344 in the stack then save no registers. Set pretend_args_size to the
3345 amount of space needed to save the registers. */
3347 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
3348 tree type
, int *pretend_size
, int no_rtl
)
3357 /* cum currently points to the last named argument, we want to
3358 start at the next argument. */
3359 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
3361 offset
= -STACK_POINTER_OFFSET
;
3362 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
3364 tmp
= gen_frame_mem (V4SImode
,
3365 plus_constant (virtual_incoming_args_rtx
,
3367 emit_move_insn (tmp
,
3368 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
3371 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
3376 spu_conditional_register_usage (void)
3380 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3381 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
3385 /* This is called to decide when we can simplify a load instruction. We
3386 must only return true for registers which we know will always be
3387 aligned. Taking into account that CSE might replace this reg with
3388 another one that has not been marked aligned.
3389 So this is really only true for frame, stack and virtual registers,
3390 which we know are always aligned and should not be adversely effected
3393 regno_aligned_for_load (int regno
)
3395 return regno
== FRAME_POINTER_REGNUM
3396 || (frame_pointer_needed
&& regno
== HARD_FRAME_POINTER_REGNUM
)
3397 || regno
== STACK_POINTER_REGNUM
3398 || (regno
>= FIRST_VIRTUAL_REGISTER
3399 && regno
<= LAST_VIRTUAL_REGISTER
);
3402 /* Return TRUE when mem is known to be 16-byte aligned. */
3404 aligned_mem_p (rtx mem
)
3406 if (MEM_ALIGN (mem
) >= 128)
3408 if (GET_MODE_SIZE (GET_MODE (mem
)) >= 16)
3410 if (GET_CODE (XEXP (mem
, 0)) == PLUS
)
3412 rtx p0
= XEXP (XEXP (mem
, 0), 0);
3413 rtx p1
= XEXP (XEXP (mem
, 0), 1);
3414 if (regno_aligned_for_load (REGNO (p0
)))
3416 if (GET_CODE (p1
) == REG
&& regno_aligned_for_load (REGNO (p1
)))
3418 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3422 else if (GET_CODE (XEXP (mem
, 0)) == REG
)
3424 if (regno_aligned_for_load (REGNO (XEXP (mem
, 0))))
3427 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem
, 0)))
3429 else if (GET_CODE (XEXP (mem
, 0)) == CONST
)
3431 rtx p0
= XEXP (XEXP (XEXP (mem
, 0), 0), 0);
3432 rtx p1
= XEXP (XEXP (XEXP (mem
, 0), 0), 1);
3433 if (GET_CODE (p0
) == SYMBOL_REF
3434 && GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
3440 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3441 into its SYMBOL_REF_FLAGS. */
3443 spu_encode_section_info (tree decl
, rtx rtl
, int first
)
3445 default_encode_section_info (decl
, rtl
, first
);
3447 /* If a variable has a forced alignment to < 16 bytes, mark it with
3448 SYMBOL_FLAG_ALIGN1. */
3449 if (TREE_CODE (decl
) == VAR_DECL
3450 && DECL_USER_ALIGN (decl
) && DECL_ALIGN (decl
) < 128)
3451 SYMBOL_REF_FLAGS (XEXP (rtl
, 0)) |= SYMBOL_FLAG_ALIGN1
;
3454 /* Return TRUE if we are certain the mem refers to a complete object
3455 which is both 16-byte aligned and padded to a 16-byte boundary. This
3456 would make it safe to store with a single instruction.
3457 We guarantee the alignment and padding for static objects by aligning
3458 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3459 FIXME: We currently cannot guarantee this for objects on the stack
3460 because assign_parm_setup_stack calls assign_stack_local with the
3461 alignment of the parameter mode and in that case the alignment never
3462 gets adjusted by LOCAL_ALIGNMENT. */
3464 store_with_one_insn_p (rtx mem
)
3466 rtx addr
= XEXP (mem
, 0);
3467 if (GET_MODE (mem
) == BLKmode
)
3469 /* Only static objects. */
3470 if (GET_CODE (addr
) == SYMBOL_REF
)
3472 /* We use the associated declaration to make sure the access is
3473 referring to the whole object.
3474 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3475 if it is necessary. Will there be cases where one exists, and
3476 the other does not? Will there be cases where both exist, but
3477 have different types? */
3478 tree decl
= MEM_EXPR (mem
);
3480 && TREE_CODE (decl
) == VAR_DECL
3481 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3483 decl
= SYMBOL_REF_DECL (addr
);
3485 && TREE_CODE (decl
) == VAR_DECL
3486 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3493 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
3495 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
3498 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
3500 rtx from
= SUBREG_REG (ops
[1]);
3501 enum machine_mode imode
= GET_MODE (from
);
3503 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
3504 && GET_MODE_CLASS (imode
) == MODE_INT
3505 && subreg_lowpart_p (ops
[1]));
3507 if (GET_MODE_SIZE (imode
) < 4)
3509 from
= gen_rtx_SUBREG (SImode
, from
, 0);
3513 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
3515 enum insn_code icode
= convert_optab_handler (trunc_optab
, mode
, imode
)->insn_code
;
3516 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
3519 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
3523 /* At least one of the operands needs to be a register. */
3524 if ((reload_in_progress
| reload_completed
) == 0
3525 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3527 rtx temp
= force_reg (mode
, ops
[1]);
3528 emit_move_insn (ops
[0], temp
);
3531 if (reload_in_progress
|| reload_completed
)
3533 if (CONSTANT_P (ops
[1]))
3534 return spu_split_immediate (ops
);
3539 if (GET_CODE (ops
[0]) == MEM
)
3541 if (!spu_valid_move (ops
))
3543 emit_insn (gen_store (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3544 gen_reg_rtx (TImode
)));
3548 else if (GET_CODE (ops
[1]) == MEM
)
3550 if (!spu_valid_move (ops
))
3553 (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3554 gen_reg_rtx (SImode
)));
3558 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3560 if (GET_CODE (ops
[1]) == CONST_INT
)
3562 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
3563 if (val
!= INTVAL (ops
[1]))
3565 emit_move_insn (ops
[0], GEN_INT (val
));
3576 /* For now, only frame registers are known to be aligned at all times.
3577 We can't trust REGNO_POINTER_ALIGN because optimization will move
3578 registers around, potentially changing an "aligned" register in an
3579 address to an unaligned register, which would result in an invalid
3581 int regno
= REGNO (reg
);
3582 return REGNO_PTR_FRAME_P (regno
) ? REGNO_POINTER_ALIGN (regno
) : 1;
3586 spu_split_load (rtx
* ops
)
3588 enum machine_mode mode
= GET_MODE (ops
[0]);
3589 rtx addr
, load
, rot
, mem
, p0
, p1
;
3592 addr
= XEXP (ops
[1], 0);
3596 if (GET_CODE (addr
) == PLUS
)
3599 aligned reg + aligned reg => lqx
3600 aligned reg + unaligned reg => lqx, rotqby
3601 aligned reg + aligned const => lqd
3602 aligned reg + unaligned const => lqd, rotqbyi
3603 unaligned reg + aligned reg => lqx, rotqby
3604 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3605 unaligned reg + aligned const => lqd, rotqby
3606 unaligned reg + unaligned const -> not allowed by legitimate address
3608 p0
= XEXP (addr
, 0);
3609 p1
= XEXP (addr
, 1);
3610 if (reg_align (p0
) < 128)
3612 if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3614 emit_insn (gen_addsi3 (ops
[3], p0
, p1
));
3622 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
3624 rot_amt
= INTVAL (p1
) & 15;
3625 p1
= GEN_INT (INTVAL (p1
) & -16);
3626 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3628 else if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3632 else if (GET_CODE (addr
) == REG
)
3634 if (reg_align (addr
) < 128)
3637 else if (GET_CODE (addr
) == CONST
)
3639 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3640 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3641 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3643 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3645 addr
= gen_rtx_CONST (Pmode
,
3646 gen_rtx_PLUS (Pmode
,
3647 XEXP (XEXP (addr
, 0), 0),
3648 GEN_INT (rot_amt
& -16)));
3650 addr
= XEXP (XEXP (addr
, 0), 0);
3655 else if (GET_CODE (addr
) == CONST_INT
)
3657 rot_amt
= INTVAL (addr
);
3658 addr
= GEN_INT (rot_amt
& -16);
3660 else if (!ALIGNED_SYMBOL_REF_P (addr
))
3663 if (GET_MODE_SIZE (mode
) < 4)
3664 rot_amt
+= GET_MODE_SIZE (mode
) - 4;
3670 emit_insn (gen_addsi3 (ops
[3], rot
, GEN_INT (rot_amt
)));
3677 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3678 mem
= change_address (ops
[1], TImode
, addr
);
3680 emit_insn (gen_movti (load
, mem
));
3683 emit_insn (gen_rotqby_ti (load
, load
, rot
));
3685 emit_insn (gen_rotlti3 (load
, load
, GEN_INT (rot_amt
* 8)));
3687 if (reload_completed
)
3688 emit_move_insn (ops
[0], gen_rtx_REG (GET_MODE (ops
[0]), REGNO (load
)));
3690 emit_insn (gen_spu_convert (ops
[0], load
));
3694 spu_split_store (rtx
* ops
)
3696 enum machine_mode mode
= GET_MODE (ops
[0]);
3699 rtx addr
, p0
, p1
, p1_lo
, smem
;
3703 addr
= XEXP (ops
[0], 0);
3705 if (GET_CODE (addr
) == PLUS
)
3708 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3709 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3710 aligned reg + aligned const => lqd, c?d, shuf, stqx
3711 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3712 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3713 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3714 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3715 unaligned reg + unaligned const -> not allowed by legitimate address
3718 p0
= XEXP (addr
, 0);
3719 p1
= p1_lo
= XEXP (addr
, 1);
3720 if (GET_CODE (p0
) == REG
&& GET_CODE (p1
) == CONST_INT
)
3722 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
3723 p1
= GEN_INT (INTVAL (p1
) & -16);
3724 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3727 else if (GET_CODE (addr
) == REG
)
3731 p1
= p1_lo
= const0_rtx
;
3736 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
3737 p1
= 0; /* aform doesn't use p1 */
3739 if (ALIGNED_SYMBOL_REF_P (addr
))
3741 else if (GET_CODE (addr
) == CONST
)
3743 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3744 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3745 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3747 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3749 addr
= gen_rtx_CONST (Pmode
,
3750 gen_rtx_PLUS (Pmode
,
3751 XEXP (XEXP (addr
, 0), 0),
3752 GEN_INT (v
& -16)));
3754 addr
= XEXP (XEXP (addr
, 0), 0);
3755 p1_lo
= GEN_INT (v
& 15);
3758 else if (GET_CODE (addr
) == CONST_INT
)
3760 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
3761 addr
= GEN_INT (INTVAL (addr
) & -16);
3765 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3767 scalar
= store_with_one_insn_p (ops
[0]);
3770 /* We could copy the flags from the ops[0] MEM to mem here,
3771 We don't because we want this load to be optimized away if
3772 possible, and copying the flags will prevent that in certain
3773 cases, e.g. consider the volatile flag. */
3775 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
3776 set_mem_alias_set (lmem
, 0);
3777 emit_insn (gen_movti (reg
, lmem
));
3779 if (!p0
|| reg_align (p0
) >= 128)
3780 p0
= stack_pointer_rtx
;
3784 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
3785 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
3787 else if (reload_completed
)
3789 if (GET_CODE (ops
[1]) == REG
)
3790 emit_move_insn (reg
, gen_rtx_REG (GET_MODE (reg
), REGNO (ops
[1])));
3791 else if (GET_CODE (ops
[1]) == SUBREG
)
3792 emit_move_insn (reg
,
3793 gen_rtx_REG (GET_MODE (reg
),
3794 REGNO (SUBREG_REG (ops
[1]))));
3800 if (GET_CODE (ops
[1]) == REG
)
3801 emit_insn (gen_spu_convert (reg
, ops
[1]));
3802 else if (GET_CODE (ops
[1]) == SUBREG
)
3803 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
3808 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
3809 emit_insn (gen_shlqby_ti
3810 (reg
, reg
, GEN_INT (4 - GET_MODE_SIZE (mode
))));
3812 smem
= change_address (ops
[0], TImode
, addr
);
3813 /* We can't use the previous alias set because the memory has changed
3814 size and can potentially overlap objects of other types. */
3815 set_mem_alias_set (smem
, 0);
3817 emit_insn (gen_movti (smem
, reg
));
3820 /* Return TRUE if X is MEM which is a struct member reference
3821 and the member can safely be loaded and stored with a single
3822 instruction because it is padded. */
3824 mem_is_padded_component_ref (rtx x
)
3826 tree t
= MEM_EXPR (x
);
3828 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
3830 t
= TREE_OPERAND (t
, 1);
3831 if (!t
|| TREE_CODE (t
) != FIELD_DECL
3832 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
3834 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3835 r
= DECL_FIELD_CONTEXT (t
);
3836 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
3838 /* Make sure they are the same mode */
3839 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
3841 /* If there are no following fields then the field alignment assures
3842 the structure is padded to the alignment which means this field is
3844 if (TREE_CHAIN (t
) == 0)
3846 /* If the following field is also aligned then this field will be
3849 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
3854 /* Parse the -mfixed-range= option string. */
3856 fix_range (const char *const_str
)
3859 char *str
, *dash
, *comma
;
3861 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3862 REG2 are either register names or register numbers. The effect
3863 of this option is to mark the registers in the range from REG1 to
3864 REG2 as ``fixed'' so they won't be used by the compiler. */
3866 i
= strlen (const_str
);
3867 str
= (char *) alloca (i
+ 1);
3868 memcpy (str
, const_str
, i
+ 1);
3872 dash
= strchr (str
, '-');
3875 warning (0, "value of -mfixed-range must have form REG1-REG2");
3879 comma
= strchr (dash
+ 1, ',');
3883 first
= decode_reg_name (str
);
3886 warning (0, "unknown register name: %s", str
);
3890 last
= decode_reg_name (dash
+ 1);
3893 warning (0, "unknown register name: %s", dash
+ 1);
3901 warning (0, "%s-%s is an empty range", str
, dash
+ 1);
3905 for (i
= first
; i
<= last
; ++i
)
3906 fixed_regs
[i
] = call_used_regs
[i
] = 1;
3917 spu_valid_move (rtx
* ops
)
3919 enum machine_mode mode
= GET_MODE (ops
[0]);
3920 if (!register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3923 /* init_expr_once tries to recog against load and store insns to set
3924 the direct_load[] and direct_store[] arrays. We always want to
3925 consider those loads and stores valid. init_expr_once is called in
3926 the context of a dummy function which does not have a decl. */
3927 if (cfun
->decl
== 0)
3930 /* Don't allows loads/stores which would require more than 1 insn.
3931 During and after reload we assume loads and stores only take 1
3933 if (GET_MODE_SIZE (mode
) < 16 && !reload_in_progress
&& !reload_completed
)
3935 if (GET_CODE (ops
[0]) == MEM
3936 && (GET_MODE_SIZE (mode
) < 4
3937 || !(store_with_one_insn_p (ops
[0])
3938 || mem_is_padded_component_ref (ops
[0]))))
3940 if (GET_CODE (ops
[1]) == MEM
3941 && (GET_MODE_SIZE (mode
) < 4 || !aligned_mem_p (ops
[1])))
3947 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3948 can be generated using the fsmbi instruction. */
3950 fsmbi_const_p (rtx x
)
3954 /* We can always choose TImode for CONST_INT because the high bits
3955 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3956 enum immediate_class c
= classify_immediate (x
, TImode
);
3957 return c
== IC_FSMBI
|| (!epilogue_completed
&& c
== IC_FSMBI2
);
3962 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3963 can be generated using the cbd, chd, cwd or cdd instruction. */
3965 cpat_const_p (rtx x
, enum machine_mode mode
)
3969 enum immediate_class c
= classify_immediate (x
, mode
);
3970 return c
== IC_CPAT
;
3976 gen_cpat_const (rtx
* ops
)
3978 unsigned char dst
[16];
3979 int i
, offset
, shift
, isize
;
3980 if (GET_CODE (ops
[3]) != CONST_INT
3981 || GET_CODE (ops
[2]) != CONST_INT
3982 || (GET_CODE (ops
[1]) != CONST_INT
3983 && GET_CODE (ops
[1]) != REG
))
3985 if (GET_CODE (ops
[1]) == REG
3986 && (!REG_POINTER (ops
[1])
3987 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops
[1])) < 128))
3990 for (i
= 0; i
< 16; i
++)
3992 isize
= INTVAL (ops
[3]);
3995 else if (isize
== 2)
3999 offset
= (INTVAL (ops
[2]) +
4000 (GET_CODE (ops
[1]) ==
4001 CONST_INT
? INTVAL (ops
[1]) : 0)) & 15;
4002 for (i
= 0; i
< isize
; i
++)
4003 dst
[offset
+ i
] = i
+ shift
;
4004 return array_to_constant (TImode
, dst
);
4007 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4008 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4009 than 16 bytes, the value is repeated across the rest of the array. */
4011 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
4016 memset (arr
, 0, 16);
4017 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
4018 if (GET_CODE (x
) == CONST_INT
4019 || (GET_CODE (x
) == CONST_DOUBLE
4020 && (mode
== SFmode
|| mode
== DFmode
)))
4022 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
4024 if (GET_CODE (x
) == CONST_DOUBLE
)
4025 val
= const_double_to_hwint (x
);
4028 first
= GET_MODE_SIZE (mode
) - 1;
4029 for (i
= first
; i
>= 0; i
--)
4031 arr
[i
] = val
& 0xff;
4034 /* Splat the constant across the whole array. */
4035 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
4038 j
= (j
== first
) ? 0 : j
+ 1;
4041 else if (GET_CODE (x
) == CONST_DOUBLE
)
4043 val
= CONST_DOUBLE_LOW (x
);
4044 for (i
= 15; i
>= 8; i
--)
4046 arr
[i
] = val
& 0xff;
4049 val
= CONST_DOUBLE_HIGH (x
);
4050 for (i
= 7; i
>= 0; i
--)
4052 arr
[i
] = val
& 0xff;
4056 else if (GET_CODE (x
) == CONST_VECTOR
)
4060 mode
= GET_MODE_INNER (mode
);
4061 units
= CONST_VECTOR_NUNITS (x
);
4062 for (i
= 0; i
< units
; i
++)
4064 elt
= CONST_VECTOR_ELT (x
, i
);
4065 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
4067 if (GET_CODE (elt
) == CONST_DOUBLE
)
4068 val
= const_double_to_hwint (elt
);
4071 first
= GET_MODE_SIZE (mode
) - 1;
4072 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
4074 for (j
= first
; j
>= 0; j
--)
4076 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
4086 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4087 smaller than 16 bytes, use the bytes that would represent that value
4088 in a register, e.g., for QImode return the value of arr[3]. */
4090 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
4092 enum machine_mode inner_mode
;
4094 int units
, size
, i
, j
, k
;
4097 if (GET_MODE_CLASS (mode
) == MODE_INT
4098 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
4100 j
= GET_MODE_SIZE (mode
);
4101 i
= j
< 4 ? 4 - j
: 0;
4102 for (val
= 0; i
< j
; i
++)
4103 val
= (val
<< 8) | arr
[i
];
4104 val
= trunc_int_for_mode (val
, mode
);
4105 return GEN_INT (val
);
4111 for (i
= high
= 0; i
< 8; i
++)
4112 high
= (high
<< 8) | arr
[i
];
4113 for (i
= 8, val
= 0; i
< 16; i
++)
4114 val
= (val
<< 8) | arr
[i
];
4115 return immed_double_const (val
, high
, TImode
);
4119 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4120 val
= trunc_int_for_mode (val
, SImode
);
4121 return hwint_to_const_double (SFmode
, val
);
4125 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
4127 val
|= (arr
[4] << 24) | (arr
[5] << 16) | (arr
[6] << 8) | arr
[7];
4128 return hwint_to_const_double (DFmode
, val
);
4131 if (!VECTOR_MODE_P (mode
))
4134 units
= GET_MODE_NUNITS (mode
);
4135 size
= GET_MODE_UNIT_SIZE (mode
);
4136 inner_mode
= GET_MODE_INNER (mode
);
4137 v
= rtvec_alloc (units
);
4139 for (k
= i
= 0; i
< units
; ++i
)
4142 for (j
= 0; j
< size
; j
++, k
++)
4143 val
= (val
<< 8) | arr
[k
];
4145 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
4146 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
4148 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
4153 return gen_rtx_CONST_VECTOR (mode
, v
);
4157 reloc_diagnostic (rtx x
)
4159 tree loc_decl
, decl
= 0;
4161 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
4164 if (GET_CODE (x
) == SYMBOL_REF
)
4165 decl
= SYMBOL_REF_DECL (x
);
4166 else if (GET_CODE (x
) == CONST
4167 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
4168 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
4170 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4171 if (decl
&& !DECL_P (decl
))
4174 /* We use last_assemble_variable_decl to get line information. It's
4175 not always going to be right and might not even be close, but will
4176 be right for the more common cases. */
4177 if (!last_assemble_variable_decl
|| in_section
== ctors_section
)
4180 loc_decl
= last_assemble_variable_decl
;
4182 /* The decl could be a string constant. */
4183 if (decl
&& DECL_P (decl
))
4184 msg
= "%Jcreating run-time relocation for %qD";
4186 msg
= "creating run-time relocation";
4188 if (TARGET_WARN_RELOC
)
4189 warning (0, msg
, loc_decl
, decl
);
4191 error (msg
, loc_decl
, decl
);
4194 /* Hook into assemble_integer so we can generate an error for run-time
4195 relocations. The SPU ABI disallows them. */
4197 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
4199 /* By default run-time relocations aren't supported, but we allow them
4200 in case users support it in their own run-time loader. And we provide
4201 a warning for those users that don't. */
4202 if ((GET_CODE (x
) == SYMBOL_REF
)
4203 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
4204 reloc_diagnostic (x
);
4206 return default_assemble_integer (x
, size
, aligned_p
);
4210 spu_asm_globalize_label (FILE * file
, const char *name
)
4212 fputs ("\t.global\t", file
);
4213 assemble_name (file
, name
);
4218 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
)
4220 enum machine_mode mode
= GET_MODE (x
);
4221 int cost
= COSTS_N_INSNS (2);
4223 /* Folding to a CONST_VECTOR will use extra space but there might
4224 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4225 only if it allows us to fold away multiple insns. Changing the cost
4226 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4227 because this cost will only be compared against a single insn.
4228 if (code == CONST_VECTOR)
4229 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4232 /* Use defaults for float operations. Not accurate but good enough. */
4235 *total
= COSTS_N_INSNS (13);
4240 *total
= COSTS_N_INSNS (6);
4246 if (satisfies_constraint_K (x
))
4248 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
4249 *total
= COSTS_N_INSNS (1);
4251 *total
= COSTS_N_INSNS (3);
4255 *total
= COSTS_N_INSNS (3);
4260 *total
= COSTS_N_INSNS (0);
4264 *total
= COSTS_N_INSNS (5);
4268 case FLOAT_TRUNCATE
:
4270 case UNSIGNED_FLOAT
:
4273 *total
= COSTS_N_INSNS (7);
4279 *total
= COSTS_N_INSNS (9);
4286 GET_CODE (XEXP (x
, 0)) ==
4287 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4288 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
4290 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
4292 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
4293 cost
= COSTS_N_INSNS (14);
4294 if ((val
& 0xffff) == 0)
4295 cost
= COSTS_N_INSNS (9);
4296 else if (val
> 0 && val
< 0x10000)
4297 cost
= COSTS_N_INSNS (11);
4306 *total
= COSTS_N_INSNS (20);
4313 *total
= COSTS_N_INSNS (4);
4316 if (XINT (x
, 1) == UNSPEC_CONVERT
)
4317 *total
= COSTS_N_INSNS (0);
4319 *total
= COSTS_N_INSNS (4);
4322 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4323 if (GET_MODE_CLASS (mode
) == MODE_INT
4324 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
4325 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
4326 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
4332 spu_eh_return_filter_mode (void)
4334 /* We would like this to be SImode, but sjlj exceptions seems to work
4335 only with word_mode. */
4339 /* Decide whether we can make a sibling call to a function. DECL is the
4340 declaration of the function being targeted by the call and EXP is the
4341 CALL_EXPR representing the call. */
4343 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
4345 return decl
&& !TARGET_LARGE_MEM
;
4348 /* We need to correctly update the back chain pointer and the Available
4349 Stack Size (which is in the second slot of the sp register.) */
4351 spu_allocate_stack (rtx op0
, rtx op1
)
4354 rtx chain
= gen_reg_rtx (V4SImode
);
4355 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
4356 rtx sp
= gen_reg_rtx (V4SImode
);
4357 rtx splatted
= gen_reg_rtx (V4SImode
);
4358 rtx pat
= gen_reg_rtx (TImode
);
4360 /* copy the back chain so we can save it back again. */
4361 emit_move_insn (chain
, stack_bot
);
4363 op1
= force_reg (SImode
, op1
);
4365 v
= 0x1020300010203ll
;
4366 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
4367 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
4369 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
4370 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
4372 if (flag_stack_check
)
4374 rtx avail
= gen_reg_rtx(SImode
);
4375 rtx result
= gen_reg_rtx(SImode
);
4376 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
4377 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
4378 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
4381 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
4383 emit_move_insn (stack_bot
, chain
);
4385 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
4389 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4391 static unsigned char arr
[16] =
4392 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4393 rtx temp
= gen_reg_rtx (SImode
);
4394 rtx temp2
= gen_reg_rtx (SImode
);
4395 rtx temp3
= gen_reg_rtx (V4SImode
);
4396 rtx temp4
= gen_reg_rtx (V4SImode
);
4397 rtx pat
= gen_reg_rtx (TImode
);
4398 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4400 /* Restore the backchain from the first word, sp from the second. */
4401 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
4402 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
4404 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4406 /* Compute Available Stack Size for sp */
4407 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4408 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4410 /* Compute Available Stack Size for back chain */
4411 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
4412 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
4413 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
4415 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4416 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
4420 spu_init_libfuncs (void)
4422 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
4423 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
4424 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
4425 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
4426 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
4427 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
4428 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
4429 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
4430 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
4431 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
4432 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
4434 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
4435 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
4438 /* Make a subreg, stripping any existing subreg. We could possibly just
4439 call simplify_subreg, but in this case we know what we want. */
4441 spu_gen_subreg (enum machine_mode mode
, rtx x
)
4443 if (GET_CODE (x
) == SUBREG
)
4445 if (GET_MODE (x
) == mode
)
4447 return gen_rtx_SUBREG (mode
, x
, 0);
4451 spu_return_in_memory (const_tree type
, const_tree fntype ATTRIBUTE_UNUSED
)
4453 return (TYPE_MODE (type
) == BLKmode
4455 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
4456 || int_size_in_bytes (type
) >
4457 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
4460 /* Create the built-in types and functions */
4462 struct spu_builtin_description spu_builtins
[] = {
4463 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4464 {fcode, icode, name, type, params, NULL_TREE},
4465 #include "spu-builtins.def"
4470 spu_init_builtins (void)
4472 struct spu_builtin_description
*d
;
4475 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
4476 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
4477 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
4478 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
4479 V4SF_type_node
= build_vector_type (float_type_node
, 4);
4480 V2DF_type_node
= build_vector_type (double_type_node
, 2);
4482 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
4483 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
4484 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
4485 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
4487 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
4489 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
4490 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
4491 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
4492 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
4493 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
4494 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
4495 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
4496 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
4497 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
4498 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
4499 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
4500 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
4502 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
4503 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
4504 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
4505 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
4506 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
4507 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
4508 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
4509 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
4511 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
4512 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
4514 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
4516 spu_builtin_types
[SPU_BTI_PTR
] =
4517 build_pointer_type (build_qualified_type
4519 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
4521 /* For each builtin we build a new prototype. The tree code will make
4522 sure nodes are shared. */
4523 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
4526 char name
[64]; /* build_function will make a copy. */
4532 /* find last parm */
4533 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
4539 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
4541 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
4543 sprintf (name
, "__builtin_%s", d
->name
);
4545 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
4547 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
4548 TREE_READONLY (d
->fndecl
) = 1;
4553 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
4555 static unsigned char arr
[16] =
4556 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4558 rtx temp
= gen_reg_rtx (Pmode
);
4559 rtx temp2
= gen_reg_rtx (V4SImode
);
4560 rtx temp3
= gen_reg_rtx (V4SImode
);
4561 rtx pat
= gen_reg_rtx (TImode
);
4562 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
4564 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4566 /* Restore the sp. */
4567 emit_move_insn (temp
, op1
);
4568 emit_move_insn (temp2
, gen_frame_mem (V4SImode
, stack_pointer_rtx
));
4570 /* Compute available stack size for sp. */
4571 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
4572 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
4574 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
4575 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp2
);
4579 spu_safe_dma (HOST_WIDE_INT channel
)
4581 return (channel
>= 21 && channel
<= 27);
4585 spu_builtin_splats (rtx ops
[])
4587 enum machine_mode mode
= GET_MODE (ops
[0]);
4588 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
4590 unsigned char arr
[16];
4591 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
4592 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
4594 else if (!flag_pic
&& GET_MODE (ops
[0]) == V4SImode
&& CONSTANT_P (ops
[1]))
4596 rtvec v
= rtvec_alloc (4);
4597 RTVEC_ELT (v
, 0) = ops
[1];
4598 RTVEC_ELT (v
, 1) = ops
[1];
4599 RTVEC_ELT (v
, 2) = ops
[1];
4600 RTVEC_ELT (v
, 3) = ops
[1];
4601 emit_move_insn (ops
[0], gen_rtx_CONST_VECTOR (mode
, v
));
4605 rtx reg
= gen_reg_rtx (TImode
);
4607 if (GET_CODE (ops
[1]) != REG
4608 && GET_CODE (ops
[1]) != SUBREG
)
4609 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
4615 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
4621 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
4626 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
4631 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
4637 emit_move_insn (reg
, shuf
);
4638 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
4643 spu_builtin_extract (rtx ops
[])
4645 enum machine_mode mode
;
4648 mode
= GET_MODE (ops
[1]);
4650 if (GET_CODE (ops
[2]) == CONST_INT
)
4655 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
4658 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
4661 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
4664 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
4667 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
4670 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
4678 from
= spu_gen_subreg (TImode
, ops
[1]);
4679 rot
= gen_reg_rtx (TImode
);
4680 tmp
= gen_reg_rtx (SImode
);
4685 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
4688 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
4689 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
4693 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
4697 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
4702 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
4704 emit_insn (gen_spu_convert (ops
[0], rot
));
4708 spu_builtin_insert (rtx ops
[])
4710 enum machine_mode mode
= GET_MODE (ops
[0]);
4711 enum machine_mode imode
= GET_MODE_INNER (mode
);
4712 rtx mask
= gen_reg_rtx (TImode
);
4715 if (GET_CODE (ops
[3]) == CONST_INT
)
4716 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
4719 offset
= gen_reg_rtx (SImode
);
4720 emit_insn (gen_mulsi3
4721 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
4724 (mask
, stack_pointer_rtx
, offset
,
4725 GEN_INT (GET_MODE_SIZE (imode
))));
4726 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
4730 spu_builtin_promote (rtx ops
[])
4732 enum machine_mode mode
, imode
;
4733 rtx rot
, from
, offset
;
4736 mode
= GET_MODE (ops
[0]);
4737 imode
= GET_MODE_INNER (mode
);
4739 from
= gen_reg_rtx (TImode
);
4740 rot
= spu_gen_subreg (TImode
, ops
[0]);
4742 emit_insn (gen_spu_convert (from
, ops
[1]));
4744 if (GET_CODE (ops
[2]) == CONST_INT
)
4746 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
4747 if (GET_MODE_SIZE (imode
) < 4)
4748 pos
+= 4 - GET_MODE_SIZE (imode
);
4749 offset
= GEN_INT (pos
& 15);
4753 offset
= gen_reg_rtx (SImode
);
4757 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
4760 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
4761 emit_insn (gen_addsi3 (offset
, offset
, offset
));
4765 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
4766 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
4770 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
4776 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
4780 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
4782 rtx shuf
= gen_reg_rtx (V4SImode
);
4783 rtx insn
= gen_reg_rtx (V4SImode
);
4788 fnaddr
= force_reg (SImode
, fnaddr
);
4789 cxt
= force_reg (SImode
, cxt
);
4791 if (TARGET_LARGE_MEM
)
4793 rtx rotl
= gen_reg_rtx (V4SImode
);
4794 rtx mask
= gen_reg_rtx (V4SImode
);
4795 rtx bi
= gen_reg_rtx (SImode
);
4796 unsigned char shufa
[16] = {
4797 2, 3, 0, 1, 18, 19, 16, 17,
4798 0, 1, 2, 3, 16, 17, 18, 19
4800 unsigned char insna
[16] = {
4802 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
4804 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4807 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
4808 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4810 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
4811 emit_insn (gen_rotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
4812 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
4813 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
4815 mem
= memory_address (Pmode
, tramp
);
4816 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4818 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
4819 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
4820 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
4824 rtx scxt
= gen_reg_rtx (SImode
);
4825 rtx sfnaddr
= gen_reg_rtx (SImode
);
4826 unsigned char insna
[16] = {
4827 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
4833 shufc
= gen_reg_rtx (TImode
);
4834 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4836 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4837 fits 18 bits and the last 4 are zeros. This will be true if
4838 the stack pointer is initialized to 0x3fff0 at program start,
4839 otherwise the ila instruction will be garbage. */
4841 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
4842 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
4844 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
4845 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
4846 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
4848 mem
= memory_address (Pmode
, tramp
);
4849 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4852 emit_insn (gen_sync ());
4856 spu_expand_sign_extend (rtx ops
[])
4858 unsigned char arr
[16];
4859 rtx pat
= gen_reg_rtx (TImode
);
4862 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
4863 if (GET_MODE (ops
[1]) == QImode
)
4865 sign
= gen_reg_rtx (HImode
);
4866 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
4867 for (i
= 0; i
< 16; i
++)
4873 for (i
= 0; i
< 16; i
++)
4875 switch (GET_MODE (ops
[1]))
4878 sign
= gen_reg_rtx (SImode
);
4879 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
4881 arr
[last
- 1] = 0x02;
4884 sign
= gen_reg_rtx (SImode
);
4885 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
4886 for (i
= 0; i
< 4; i
++)
4887 arr
[last
- i
] = 3 - i
;
4890 sign
= gen_reg_rtx (SImode
);
4891 c
= gen_reg_rtx (SImode
);
4892 emit_insn (gen_spu_convert (c
, ops
[1]));
4893 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
4894 for (i
= 0; i
< 8; i
++)
4895 arr
[last
- i
] = 7 - i
;
4901 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4902 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
4905 /* expand vector initialization. If there are any constant parts,
4906 load constant parts first. Then load any non-constant parts. */
4908 spu_expand_vector_init (rtx target
, rtx vals
)
4910 enum machine_mode mode
= GET_MODE (target
);
4911 int n_elts
= GET_MODE_NUNITS (mode
);
4913 bool all_same
= true;
4914 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
4917 first
= XVECEXP (vals
, 0, 0);
4918 for (i
= 0; i
< n_elts
; ++i
)
4920 x
= XVECEXP (vals
, 0, i
);
4921 if (!CONSTANT_P (x
))
4925 if (first_constant
== NULL_RTX
)
4928 if (i
> 0 && !rtx_equal_p (x
, first
))
4932 /* if all elements are the same, use splats to repeat elements */
4935 if (!CONSTANT_P (first
)
4936 && !register_operand (first
, GET_MODE (x
)))
4937 first
= force_reg (GET_MODE (first
), first
);
4938 emit_insn (gen_spu_splats (target
, first
));
4942 /* load constant parts */
4943 if (n_var
!= n_elts
)
4947 emit_move_insn (target
,
4948 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
4952 rtx constant_parts_rtx
= copy_rtx (vals
);
4954 gcc_assert (first_constant
!= NULL_RTX
);
4955 /* fill empty slots with the first constant, this increases
4956 our chance of using splats in the recursive call below. */
4957 for (i
= 0; i
< n_elts
; ++i
)
4958 if (!CONSTANT_P (XVECEXP (constant_parts_rtx
, 0, i
)))
4959 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
4961 spu_expand_vector_init (target
, constant_parts_rtx
);
4965 /* load variable parts */
4968 rtx insert_operands
[4];
4970 insert_operands
[0] = target
;
4971 insert_operands
[2] = target
;
4972 for (i
= 0; i
< n_elts
; ++i
)
4974 x
= XVECEXP (vals
, 0, i
);
4975 if (!CONSTANT_P (x
))
4977 if (!register_operand (x
, GET_MODE (x
)))
4978 x
= force_reg (GET_MODE (x
), x
);
4979 insert_operands
[1] = x
;
4980 insert_operands
[3] = GEN_INT (i
);
4981 spu_builtin_insert (insert_operands
);
4987 /* Return insn index for the vector compare instruction for given CODE,
4988 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4991 get_vec_cmp_insn (enum rtx_code code
,
4992 enum machine_mode dest_mode
,
4993 enum machine_mode op_mode
)
4999 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5000 return CODE_FOR_ceq_v16qi
;
5001 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5002 return CODE_FOR_ceq_v8hi
;
5003 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5004 return CODE_FOR_ceq_v4si
;
5005 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5006 return CODE_FOR_ceq_v4sf
;
5007 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5008 return CODE_FOR_ceq_v2df
;
5011 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5012 return CODE_FOR_cgt_v16qi
;
5013 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5014 return CODE_FOR_cgt_v8hi
;
5015 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5016 return CODE_FOR_cgt_v4si
;
5017 if (dest_mode
== V4SImode
&& op_mode
== V4SFmode
)
5018 return CODE_FOR_cgt_v4sf
;
5019 if (dest_mode
== V2DImode
&& op_mode
== V2DFmode
)
5020 return CODE_FOR_cgt_v2df
;
5023 if (dest_mode
== V16QImode
&& op_mode
== V16QImode
)
5024 return CODE_FOR_clgt_v16qi
;
5025 if (dest_mode
== V8HImode
&& op_mode
== V8HImode
)
5026 return CODE_FOR_clgt_v8hi
;
5027 if (dest_mode
== V4SImode
&& op_mode
== V4SImode
)
5028 return CODE_FOR_clgt_v4si
;
5036 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5037 DMODE is expected destination mode. This is a recursive function. */
5040 spu_emit_vector_compare (enum rtx_code rcode
,
5042 enum machine_mode dmode
)
5046 enum machine_mode dest_mode
;
5047 enum machine_mode op_mode
= GET_MODE (op1
);
5049 gcc_assert (GET_MODE (op0
) == GET_MODE (op1
));
5051 /* Floating point vector compare instructions uses destination V4SImode.
5052 Double floating point vector compare instructions uses destination V2DImode.
5053 Move destination to appropriate mode later. */
5054 if (dmode
== V4SFmode
)
5055 dest_mode
= V4SImode
;
5056 else if (dmode
== V2DFmode
)
5057 dest_mode
= V2DImode
;
5061 mask
= gen_reg_rtx (dest_mode
);
5062 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5064 if (vec_cmp_insn
== -1)
5066 bool swap_operands
= false;
5067 bool try_again
= false;
5072 swap_operands
= true;
5077 swap_operands
= true;
5081 /* Treat A != B as ~(A==B). */
5083 enum insn_code nor_code
;
5084 rtx eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5085 nor_code
= optab_handler (one_cmpl_optab
, (int)dest_mode
)->insn_code
;
5086 gcc_assert (nor_code
!= CODE_FOR_nothing
);
5087 emit_insn (GEN_FCN (nor_code
) (mask
, eq_rtx
));
5088 if (dmode
!= dest_mode
)
5090 rtx temp
= gen_reg_rtx (dest_mode
);
5091 convert_move (temp
, mask
, 0);
5101 /* Try GT/GTU/LT/LTU OR EQ */
5104 enum insn_code ior_code
;
5105 enum rtx_code new_code
;
5109 case GE
: new_code
= GT
; break;
5110 case GEU
: new_code
= GTU
; break;
5111 case LE
: new_code
= LT
; break;
5112 case LEU
: new_code
= LTU
; break;
5117 c_rtx
= spu_emit_vector_compare (new_code
, op0
, op1
, dest_mode
);
5118 eq_rtx
= spu_emit_vector_compare (EQ
, op0
, op1
, dest_mode
);
5120 ior_code
= optab_handler (ior_optab
, (int)dest_mode
)->insn_code
;
5121 gcc_assert (ior_code
!= CODE_FOR_nothing
);
5122 emit_insn (GEN_FCN (ior_code
) (mask
, c_rtx
, eq_rtx
));
5123 if (dmode
!= dest_mode
)
5125 rtx temp
= gen_reg_rtx (dest_mode
);
5126 convert_move (temp
, mask
, 0);
5136 /* You only get two chances. */
5138 vec_cmp_insn
= get_vec_cmp_insn (rcode
, dest_mode
, op_mode
);
5140 gcc_assert (vec_cmp_insn
!= -1);
5151 emit_insn (GEN_FCN (vec_cmp_insn
) (mask
, op0
, op1
));
5152 if (dmode
!= dest_mode
)
5154 rtx temp
= gen_reg_rtx (dest_mode
);
5155 convert_move (temp
, mask
, 0);
5162 /* Emit vector conditional expression.
5163 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5164 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5167 spu_emit_vector_cond_expr (rtx dest
, rtx op1
, rtx op2
,
5168 rtx cond
, rtx cc_op0
, rtx cc_op1
)
5170 enum machine_mode dest_mode
= GET_MODE (dest
);
5171 enum rtx_code rcode
= GET_CODE (cond
);
5174 /* Get the vector mask for the given relational operations. */
5175 mask
= spu_emit_vector_compare (rcode
, cc_op0
, cc_op1
, dest_mode
);
5177 emit_insn(gen_selb (dest
, op2
, op1
, mask
));
5183 spu_force_reg (enum machine_mode mode
, rtx op
)
5186 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
5188 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
5189 || GET_MODE (op
) == BLKmode
)
5190 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
5194 r
= force_reg (GET_MODE (op
), op
);
5195 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
5197 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
5202 x
= gen_reg_rtx (mode
);
5203 emit_insn (gen_spu_convert (x
, r
));
5208 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
5210 HOST_WIDE_INT v
= 0;
5212 /* Check the range of immediate operands. */
5213 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
5215 int range
= p
- SPU_BTI_7
;
5217 if (!CONSTANT_P (op
))
5218 error ("%s expects an integer literal in the range [%d, %d].",
5220 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
5222 if (GET_CODE (op
) == CONST
5223 && (GET_CODE (XEXP (op
, 0)) == PLUS
5224 || GET_CODE (XEXP (op
, 0)) == MINUS
))
5226 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
5227 op
= XEXP (XEXP (op
, 0), 0);
5229 else if (GET_CODE (op
) == CONST_INT
)
5231 else if (GET_CODE (op
) == CONST_VECTOR
5232 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == CONST_INT
)
5233 v
= INTVAL (CONST_VECTOR_ELT (op
, 0));
5235 /* The default for v is 0 which is valid in every range. */
5236 if (v
< spu_builtin_range
[range
].low
5237 || v
> spu_builtin_range
[range
].high
)
5238 error ("%s expects an integer literal in the range [%d, %d]. ("
5239 HOST_WIDE_INT_PRINT_DEC
")",
5241 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
,
5250 /* This is only used in lqa, and stqa. Even though the insns
5251 encode 16 bits of the address (all but the 2 least
5252 significant), only 14 bits are used because it is masked to
5253 be 16 byte aligned. */
5257 /* This is used for lqr and stqr. */
5264 if (GET_CODE (op
) == LABEL_REF
5265 || (GET_CODE (op
) == SYMBOL_REF
5266 && SYMBOL_REF_FUNCTION_P (op
))
5267 || (v
& ((1 << lsbits
) - 1)) != 0)
5268 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
5275 expand_builtin_args (struct spu_builtin_description
*d
, tree exp
,
5276 rtx target
, rtx ops
[])
5278 enum insn_code icode
= d
->icode
;
5281 /* Expand the arguments into rtl. */
5283 if (d
->parm
[0] != SPU_BTI_VOID
)
5286 for (a
= 0; i
< insn_data
[icode
].n_operands
; i
++, a
++)
5288 tree arg
= CALL_EXPR_ARG (exp
, a
);
5291 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, 0);
5296 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
5297 tree exp
, rtx target
)
5301 enum insn_code icode
= d
->icode
;
5302 enum machine_mode mode
, tmode
;
5306 /* Set up ops[] with values from arglist. */
5307 expand_builtin_args (d
, exp
, target
, ops
);
5309 /* Handle the target operand which must be operand 0. */
5311 if (d
->parm
[0] != SPU_BTI_VOID
)
5314 /* We prefer the mode specified for the match_operand otherwise
5315 use the mode from the builtin function prototype. */
5316 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
5317 if (tmode
== VOIDmode
)
5318 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
5320 /* Try to use target because not using it can lead to extra copies
5321 and when we are using all of the registers extra copies leads
5323 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
5326 target
= ops
[0] = gen_reg_rtx (tmode
);
5328 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
5334 if (d
->fcode
== SPU_MASK_FOR_LOAD
)
5336 enum machine_mode mode
= insn_data
[icode
].operand
[1].mode
;
5341 arg
= CALL_EXPR_ARG (exp
, 0);
5342 gcc_assert (TREE_CODE (TREE_TYPE (arg
)) == POINTER_TYPE
);
5343 op
= expand_expr (arg
, NULL_RTX
, Pmode
, EXPAND_NORMAL
);
5344 addr
= memory_address (mode
, op
);
5347 op
= gen_reg_rtx (GET_MODE (addr
));
5348 emit_insn (gen_rtx_SET (VOIDmode
, op
,
5349 gen_rtx_NEG (GET_MODE (addr
), addr
)));
5350 op
= gen_rtx_MEM (mode
, op
);
5352 pat
= GEN_FCN (icode
) (target
, op
);
5359 /* Ignore align_hint, but still expand it's args in case they have
5361 if (icode
== CODE_FOR_spu_align_hint
)
5364 /* Handle the rest of the operands. */
5365 for (p
= 1; i
< insn_data
[icode
].n_operands
; i
++, p
++)
5367 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
5368 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
5370 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
5372 /* mode can be VOIDmode here for labels */
5374 /* For specific intrinsics with an immediate operand, e.g.,
5375 si_ai(), we sometimes need to convert the scalar argument to a
5376 vector argument by splatting the scalar. */
5377 if (VECTOR_MODE_P (mode
)
5378 && (GET_CODE (ops
[i
]) == CONST_INT
5379 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
5380 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
5382 if (GET_CODE (ops
[i
]) == CONST_INT
)
5383 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
5386 rtx reg
= gen_reg_rtx (mode
);
5387 enum machine_mode imode
= GET_MODE_INNER (mode
);
5388 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
5389 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
5390 if (imode
!= GET_MODE (ops
[i
]))
5391 ops
[i
] = convert_to_mode (imode
, ops
[i
],
5392 TYPE_UNSIGNED (spu_builtin_types
5394 emit_insn (gen_spu_splats (reg
, ops
[i
]));
5399 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
5401 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
5402 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
5405 switch (insn_data
[icode
].n_operands
)
5408 pat
= GEN_FCN (icode
) (0);
5411 pat
= GEN_FCN (icode
) (ops
[0]);
5414 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
5417 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
5420 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
5423 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
5426 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
5435 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
5436 emit_call_insn (pat
);
5437 else if (d
->type
== B_JUMP
)
5439 emit_jump_insn (pat
);
5445 return_type
= spu_builtin_types
[d
->parm
[0]];
5446 if (d
->parm
[0] != SPU_BTI_VOID
5447 && GET_MODE (target
) != TYPE_MODE (return_type
))
5449 /* target is the return value. It should always be the mode of
5450 the builtin function prototype. */
5451 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
5458 spu_expand_builtin (tree exp
,
5460 rtx subtarget ATTRIBUTE_UNUSED
,
5461 enum machine_mode mode ATTRIBUTE_UNUSED
,
5462 int ignore ATTRIBUTE_UNUSED
)
5464 tree fndecl
= TREE_OPERAND (CALL_EXPR_FN (exp
), 0);
5465 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
5466 struct spu_builtin_description
*d
;
5468 if (fcode
< NUM_SPU_BUILTINS
)
5470 d
= &spu_builtins
[fcode
];
5472 return spu_expand_builtin_1 (d
, exp
, target
);
5477 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5479 spu_builtin_mul_widen_even (tree type
)
5481 switch (TYPE_MODE (type
))
5484 if (TYPE_UNSIGNED (type
))
5485 return spu_builtins
[SPU_MULE_0
].fndecl
;
5487 return spu_builtins
[SPU_MULE_1
].fndecl
;
5494 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5496 spu_builtin_mul_widen_odd (tree type
)
5498 switch (TYPE_MODE (type
))
5501 if (TYPE_UNSIGNED (type
))
5502 return spu_builtins
[SPU_MULO_1
].fndecl
;
5504 return spu_builtins
[SPU_MULO_0
].fndecl
;
5511 /* Implement targetm.vectorize.builtin_mask_for_load. */
5513 spu_builtin_mask_for_load (void)
5515 struct spu_builtin_description
*d
= &spu_builtins
[SPU_MASK_FOR_LOAD
];
5520 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5522 spu_builtin_vectorization_cost (bool runtime_test
)
5524 /* If the branch of the runtime test is taken - i.e. - the vectorized
5525 version is skipped - this incurs a misprediction cost (because the
5526 vectorized version is expected to be the fall-through). So we subtract
5527 the latency of a mispredicted branch from the costs that are incurred
5528 when the vectorized version is executed. */
5535 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5536 after applying N number of iterations. This routine does not determine
5537 how may iterations are required to reach desired alignment. */
5540 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED
, bool is_packed
)
5545 /* All other types are naturally aligned. */
5549 /* Count the total number of instructions in each pipe and return the
5550 maximum, which is used as the Minimum Iteration Interval (MII)
5551 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5552 -2 are instructions that can go in pipe0 or pipe1. */
5554 spu_sms_res_mii (struct ddg
*g
)
5557 unsigned t
[4] = {0, 0, 0, 0};
5559 for (i
= 0; i
< g
->num_nodes
; i
++)
5561 rtx insn
= g
->nodes
[i
].insn
;
5562 int p
= get_pipe (insn
) + 2;
5568 if (dump_file
&& INSN_P (insn
))
5569 fprintf (dump_file
, "i%d %s %d %d\n",
5571 insn_data
[INSN_CODE(insn
)].name
,
5575 fprintf (dump_file
, "%d %d %d %d\n", t
[0], t
[1], t
[2], t
[3]);
5577 return MAX ((t
[0] + t
[2] + t
[3] + 1) / 2, MAX (t
[2], t
[3]));
5582 spu_init_expanders (void)
5584 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5585 * frame_pointer_needed is true. We don't know that until we're
5586 * expanding the prologue. */
5588 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM
) = 8;
5591 static enum machine_mode
5592 spu_libgcc_cmp_return_mode (void)
5595 /* For SPU word mode is TI mode so it is better to use SImode
5596 for compare returns. */
5600 static enum machine_mode
5601 spu_libgcc_shift_count_mode (void)
5603 /* For SPU word mode is TI mode so it is better to use SImode
5604 for shift counts. */