1 /* Copyright (C) 2006 Free Software Foundation, Inc.
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 2 of the License, or (at your option)
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13 You should have received a copy of the GNU General Public License
14 along with this file; see the file COPYING. If not, write to the Free
15 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
20 #include "coretypes.h"
24 #include "hard-reg-set.h"
26 #include "insn-config.h"
27 #include "conditions.h"
28 #include "insn-attr.h"
38 #include "basic-block.h"
39 #include "integrate.h"
45 #include "target-def.h"
46 #include "langhooks.h"
48 #include "cfglayout.h"
49 #include "sched-int.h"
54 #include "tree-gimple.h"
55 #include "tm-constrs.h"
56 #include "spu-builtins.h"
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
64 static struct spu_builtin_range spu_builtin_range
[] = {
65 {-0x40ll
, 0x7fll
}, /* SPU_BTI_7 */
66 {-0x40ll
, 0x3fll
}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll
}, /* SPU_BTI_U7 */
68 {-0x200ll
, 0x1ffll
}, /* SPU_BTI_S10 */
69 {-0x2000ll
, 0x1fffll
}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll
}, /* SPU_BTI_U14 */
71 {-0x8000ll
, 0xffffll
}, /* SPU_BTI_16 */
72 {-0x8000ll
, 0x7fffll
}, /* SPU_BTI_S16 */
73 {-0x20000ll
, 0x1ffffll
}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll
}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll
}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll
}, /* SPU_BTI_U18 */
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated
[FIRST_PSEUDO_REGISTER
];
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode
);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode
);
87 static rtx
adjust_operand (rtx op
, HOST_WIDE_INT
* start
);
88 static rtx
get_pic_reg (void);
89 static int need_to_save_reg (int regno
, int saving
);
90 static rtx
frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
);
91 static rtx
frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
);
92 static rtx
frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
,
94 static void emit_nop_for_insn (rtx insn
);
95 static bool insn_clobbers_hbr (rtx insn
);
96 static void spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
,
98 static rtx
get_branch_target (rtx branch
);
99 static void insert_branch_hints (void);
100 static void insert_nops (void);
101 static void spu_machine_dependent_reorg (void);
102 static int spu_sched_issue_rate (void);
103 static int spu_sched_variable_issue (FILE * dump
, int verbose
, rtx insn
,
105 static int get_pipe (rtx insn
);
106 static int spu_sched_adjust_priority (rtx insn
, int pri
);
107 static int spu_sched_adjust_cost (rtx insn
, rtx link
, rtx dep_insn
, int cost
);
108 static tree
spu_handle_fndecl_attribute (tree
* node
, tree name
, tree args
,
110 unsigned char *no_add_attrs
);
111 static tree
spu_handle_vector_attribute (tree
* node
, tree name
, tree args
,
113 unsigned char *no_add_attrs
);
114 static int spu_naked_function_p (tree func
);
115 static unsigned char spu_pass_by_reference (int *cum
, enum machine_mode mode
,
116 tree type
, unsigned char named
);
117 static tree
spu_build_builtin_va_list (void);
118 static tree
spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
120 static int regno_aligned_for_load (int regno
);
121 static int store_with_one_insn_p (rtx mem
);
122 static int reg_align (rtx reg
);
123 static int mem_is_padded_component_ref (rtx x
);
124 static bool spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
);
125 static void spu_asm_globalize_label (FILE * file
, const char *name
);
126 static unsigned char spu_rtx_costs (rtx x
, int code
, int outer_code
,
128 static unsigned char spu_function_ok_for_sibcall (tree decl
, tree exp
);
129 static void spu_init_libfuncs (void);
130 static bool spu_return_in_memory (tree type
, tree fntype
);
132 extern const char *reg_names
[];
133 rtx spu_compare_op0
, spu_compare_op1
;
147 static enum spu_immediate
which_immediate_load (HOST_WIDE_INT val
);
148 static enum spu_immediate
which_logical_immediate (HOST_WIDE_INT val
);
150 /* Built in types. */
151 tree spu_builtin_types
[SPU_BTI_MAX
];
153 /* TARGET overrides. */
155 #undef TARGET_INIT_BUILTINS
156 #define TARGET_INIT_BUILTINS spu_init_builtins
158 #undef TARGET_EXPAND_BUILTIN
159 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
161 #undef TARGET_EH_RETURN_FILTER_MODE
162 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
164 /* The .8byte directive doesn't seem to work well for a 32 bit
166 #undef TARGET_ASM_UNALIGNED_DI_OP
167 #define TARGET_ASM_UNALIGNED_DI_OP NULL
169 #undef TARGET_RTX_COSTS
170 #define TARGET_RTX_COSTS spu_rtx_costs
172 #undef TARGET_ADDRESS_COST
173 #define TARGET_ADDRESS_COST hook_int_rtx_0
175 #undef TARGET_SCHED_ISSUE_RATE
176 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
178 #undef TARGET_SCHED_VARIABLE_ISSUE
179 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
181 #undef TARGET_SCHED_ADJUST_PRIORITY
182 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
184 #undef TARGET_SCHED_ADJUST_COST
185 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
187 const struct attribute_spec spu_attribute_table
[];
188 #undef TARGET_ATTRIBUTE_TABLE
189 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
191 #undef TARGET_ASM_INTEGER
192 #define TARGET_ASM_INTEGER spu_assemble_integer
194 #undef TARGET_SCALAR_MODE_SUPPORTED_P
195 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
197 #undef TARGET_VECTOR_MODE_SUPPORTED_P
198 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
200 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
201 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
203 #undef TARGET_ASM_GLOBALIZE_LABEL
204 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
206 #undef TARGET_PASS_BY_REFERENCE
207 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
209 #undef TARGET_MUST_PASS_IN_STACK
210 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
212 #undef TARGET_BUILD_BUILTIN_VA_LIST
213 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
215 #undef TARGET_SETUP_INCOMING_VARARGS
216 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
218 #undef TARGET_MACHINE_DEPENDENT_REORG
219 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
221 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
222 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
224 #undef TARGET_DEFAULT_TARGET_FLAGS
225 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
227 #undef TARGET_INIT_LIBFUNCS
228 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
230 #undef TARGET_RETURN_IN_MEMORY
231 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
233 struct gcc_target targetm
= TARGET_INITIALIZER
;
235 /* Sometimes certain combinations of command options do not make sense
236 on a particular target machine. You can define a macro
237 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
238 executed once just after all the command options have been parsed. */
240 spu_override_options (void)
243 /* Override some of the default param values. With so many registers
244 larger values are better for these params. */
245 if (MAX_UNROLLED_INSNS
== 100)
246 MAX_UNROLLED_INSNS
= 250;
247 if (MAX_PENDING_LIST_LENGTH
== 32)
248 MAX_PENDING_LIST_LENGTH
= 128;
250 flag_omit_frame_pointer
= 1;
252 if (align_functions
< 8)
256 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
257 struct attribute_spec.handler. */
259 /* Table of machine attributes. */
260 const struct attribute_spec spu_attribute_table
[] =
262 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
263 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute
},
264 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute
},
265 { NULL
, 0, 0, false, false, false, NULL
}
268 /* True if MODE is valid for the target. By "valid", we mean able to
269 be manipulated in non-trivial ways. In particular, this means all
270 the arithmetic is supported. */
272 spu_scalar_mode_supported_p (enum machine_mode mode
)
290 /* Similarly for vector modes. "Supported" here is less strict. At
291 least some operations are supported; need to check optabs or builtins
292 for further details. */
294 spu_vector_mode_supported_p (enum machine_mode mode
)
311 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
312 least significant bytes of the outer mode. This function returns
313 TRUE for the SUBREG's where this is correct. */
315 valid_subreg (rtx op
)
317 enum machine_mode om
= GET_MODE (op
);
318 enum machine_mode im
= GET_MODE (SUBREG_REG (op
));
319 return om
!= VOIDmode
&& im
!= VOIDmode
320 && (GET_MODE_SIZE (im
) == GET_MODE_SIZE (om
)
321 || (GET_MODE_SIZE (im
) <= 4 && GET_MODE_SIZE (om
) <= 4));
324 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
325 and adjust the start offset. */
327 adjust_operand (rtx op
, HOST_WIDE_INT
* start
)
329 enum machine_mode mode
;
331 /* Strip any SUBREG */
332 if (GET_CODE (op
) == SUBREG
)
336 GET_MODE_BITSIZE (GET_MODE (op
)) -
337 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op
)));
338 op
= SUBREG_REG (op
);
340 /* If it is smaller than SI, assure a SUBREG */
341 op_size
= GET_MODE_BITSIZE (GET_MODE (op
));
345 *start
+= 32 - op_size
;
348 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
349 mode
= mode_for_size (op_size
, MODE_INT
, 0);
350 if (mode
!= GET_MODE (op
))
351 op
= gen_rtx_SUBREG (mode
, op
, 0);
356 spu_expand_extv (rtx ops
[], int unsignedp
)
358 HOST_WIDE_INT width
= INTVAL (ops
[2]);
359 HOST_WIDE_INT start
= INTVAL (ops
[3]);
360 HOST_WIDE_INT src_size
, dst_size
;
361 enum machine_mode src_mode
, dst_mode
;
362 rtx dst
= ops
[0], src
= ops
[1];
365 dst
= adjust_operand (ops
[0], 0);
366 dst_mode
= GET_MODE (dst
);
367 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
369 src
= adjust_operand (src
, &start
);
370 src_mode
= GET_MODE (src
);
371 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
375 s
= gen_reg_rtx (src_mode
);
379 emit_insn (gen_ashlsi3 (s
, src
, GEN_INT (start
)));
382 emit_insn (gen_ashldi3 (s
, src
, GEN_INT (start
)));
385 emit_insn (gen_ashlti3 (s
, src
, GEN_INT (start
)));
393 if (width
< src_size
)
400 icode
= unsignedp
? CODE_FOR_lshrsi3
: CODE_FOR_ashrsi3
;
403 icode
= unsignedp
? CODE_FOR_lshrdi3
: CODE_FOR_ashrdi3
;
406 icode
= unsignedp
? CODE_FOR_lshrti3
: CODE_FOR_ashrti3
;
411 s
= gen_reg_rtx (src_mode
);
412 pat
= GEN_FCN (icode
) (s
, src
, GEN_INT (src_size
- width
));
417 convert_move (dst
, src
, unsignedp
);
421 spu_expand_insv (rtx ops
[])
423 HOST_WIDE_INT width
= INTVAL (ops
[1]);
424 HOST_WIDE_INT start
= INTVAL (ops
[2]);
425 HOST_WIDE_INT maskbits
;
426 enum machine_mode dst_mode
, src_mode
;
427 rtx dst
= ops
[0], src
= ops
[3];
428 int dst_size
, src_size
;
434 if (GET_CODE (ops
[0]) == MEM
)
435 dst
= gen_reg_rtx (TImode
);
437 dst
= adjust_operand (dst
, &start
);
438 dst_mode
= GET_MODE (dst
);
439 dst_size
= GET_MODE_BITSIZE (GET_MODE (dst
));
441 if (CONSTANT_P (src
))
443 enum machine_mode m
=
444 (width
<= 32 ? SImode
: width
<= 64 ? DImode
: TImode
);
445 src
= force_reg (m
, convert_to_mode (m
, src
, 0));
447 src
= adjust_operand (src
, 0);
448 src_mode
= GET_MODE (src
);
449 src_size
= GET_MODE_BITSIZE (GET_MODE (src
));
451 mask
= gen_reg_rtx (dst_mode
);
452 shift_reg
= gen_reg_rtx (dst_mode
);
453 shift
= dst_size
- start
- width
;
455 /* It's not safe to use subreg here because the compiler assumes
456 that the SUBREG_REG is right justified in the SUBREG. */
457 convert_move (shift_reg
, src
, 1);
464 emit_insn (gen_ashlsi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
467 emit_insn (gen_ashldi3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
470 emit_insn (gen_ashlti3 (shift_reg
, shift_reg
, GEN_INT (shift
)));
482 maskbits
= (-1ll << (32 - width
- start
));
484 maskbits
+= (1ll << (32 - start
));
485 emit_move_insn (mask
, GEN_INT (maskbits
));
488 maskbits
= (-1ll << (64 - width
- start
));
490 maskbits
+= (1ll << (64 - start
));
491 emit_move_insn (mask
, GEN_INT (maskbits
));
495 unsigned char arr
[16];
497 memset (arr
, 0, sizeof (arr
));
498 arr
[i
] = 0xff >> (start
& 7);
499 for (i
++; i
<= (start
+ width
- 1) / 8; i
++)
501 arr
[i
- 1] &= 0xff << (7 - ((start
+ width
- 1) & 7));
502 emit_move_insn (mask
, array_to_constant (TImode
, arr
));
508 if (GET_CODE (ops
[0]) == MEM
)
510 rtx aligned
= gen_reg_rtx (SImode
);
511 rtx low
= gen_reg_rtx (SImode
);
512 rtx addr
= gen_reg_rtx (SImode
);
513 rtx rotl
= gen_reg_rtx (SImode
);
514 rtx mask0
= gen_reg_rtx (TImode
);
517 emit_move_insn (addr
, XEXP (ops
[0], 0));
518 emit_insn (gen_andsi3 (aligned
, addr
, GEN_INT (-16)));
519 emit_insn (gen_andsi3 (low
, addr
, GEN_INT (15)));
520 emit_insn (gen_negsi2 (rotl
, low
));
521 emit_insn (gen_rotqby_ti (shift_reg
, shift_reg
, rotl
));
522 emit_insn (gen_rotqmby_ti (mask0
, mask
, rotl
));
523 mem
= change_address (ops
[0], TImode
, aligned
);
524 set_mem_alias_set (mem
, 0);
525 emit_move_insn (dst
, mem
);
526 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask0
));
527 emit_move_insn (mem
, dst
);
528 if (start
+ width
> MEM_ALIGN (ops
[0]))
530 rtx shl
= gen_reg_rtx (SImode
);
531 rtx mask1
= gen_reg_rtx (TImode
);
532 rtx dst1
= gen_reg_rtx (TImode
);
534 emit_insn (gen_subsi3 (shl
, GEN_INT (16), low
));
535 emit_insn (gen_shlqby_ti (mask1
, mask
, shl
));
536 mem1
= adjust_address (mem
, TImode
, 16);
537 set_mem_alias_set (mem1
, 0);
538 emit_move_insn (dst1
, mem1
);
539 emit_insn (gen_selb (dst1
, dst1
, shift_reg
, mask1
));
540 emit_move_insn (mem1
, dst1
);
544 emit_insn (gen_selb (dst
, dst
, shift_reg
, mask
));
549 spu_expand_block_move (rtx ops
[])
551 HOST_WIDE_INT bytes
, align
, offset
;
552 rtx src
, dst
, sreg
, dreg
, target
;
554 if (GET_CODE (ops
[2]) != CONST_INT
555 || GET_CODE (ops
[3]) != CONST_INT
556 || INTVAL (ops
[2]) > (HOST_WIDE_INT
) (MOVE_RATIO
* 8))
559 bytes
= INTVAL (ops
[2]);
560 align
= INTVAL (ops
[3]);
570 for (offset
= 0; offset
+ 16 <= bytes
; offset
+= 16)
572 dst
= adjust_address (ops
[0], V16QImode
, offset
);
573 src
= adjust_address (ops
[1], V16QImode
, offset
);
574 emit_move_insn (dst
, src
);
579 unsigned char arr
[16] = { 0 };
580 for (i
= 0; i
< bytes
- offset
; i
++)
582 dst
= adjust_address (ops
[0], V16QImode
, offset
);
583 src
= adjust_address (ops
[1], V16QImode
, offset
);
584 mask
= gen_reg_rtx (V16QImode
);
585 sreg
= gen_reg_rtx (V16QImode
);
586 dreg
= gen_reg_rtx (V16QImode
);
587 target
= gen_reg_rtx (V16QImode
);
588 emit_move_insn (mask
, array_to_constant (V16QImode
, arr
));
589 emit_move_insn (dreg
, dst
);
590 emit_move_insn (sreg
, src
);
591 emit_insn (gen_selb (target
, dreg
, sreg
, mask
));
592 emit_move_insn (dst
, target
);
600 { SPU_EQ
, SPU_GT
, SPU_GTU
};
603 int spu_comp_icode
[8][3] = {
604 {CODE_FOR_ceq_qi
, CODE_FOR_cgt_qi
, CODE_FOR_clgt_qi
},
605 {CODE_FOR_ceq_hi
, CODE_FOR_cgt_hi
, CODE_FOR_clgt_hi
},
606 {CODE_FOR_ceq_si
, CODE_FOR_cgt_si
, CODE_FOR_clgt_si
},
607 {CODE_FOR_ceq_di
, CODE_FOR_cgt_di
, CODE_FOR_clgt_di
},
608 {CODE_FOR_ceq_ti
, CODE_FOR_cgt_ti
, CODE_FOR_clgt_ti
},
609 {CODE_FOR_ceq_sf
, CODE_FOR_cgt_sf
, 0},
611 {CODE_FOR_ceq_vec
, 0, 0},
614 /* Generate a compare for CODE. Return a brand-new rtx that represents
615 the result of the compare. GCC can figure this out too if we don't
616 provide all variations of compares, but GCC always wants to use
617 WORD_MODE, we can generate better code in most cases if we do it
620 spu_emit_branch_or_set (int is_set
, enum rtx_code code
, rtx operands
[])
622 int reverse_compare
= 0;
623 int reverse_test
= 0;
626 rtx target
= operands
[0];
627 enum machine_mode comp_mode
;
628 enum machine_mode op_mode
;
629 enum spu_comp_code scode
;
632 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
633 and so on, to keep the constant in operand 1. */
634 if (GET_CODE (spu_compare_op1
) == CONST_INT
)
636 HOST_WIDE_INT val
= INTVAL (spu_compare_op1
) - 1;
637 if (trunc_int_for_mode (val
, GET_MODE (spu_compare_op0
)) == val
)
641 spu_compare_op1
= GEN_INT (val
);
645 spu_compare_op1
= GEN_INT (val
);
649 spu_compare_op1
= GEN_INT (val
);
653 spu_compare_op1
= GEN_INT (val
);
714 op_mode
= GET_MODE (spu_compare_op0
);
753 if (GET_MODE (spu_compare_op1
) == DFmode
)
755 rtx reg
= gen_reg_rtx (DFmode
);
756 if (!flag_unsafe_math_optimizations
757 || (scode
!= SPU_GT
&& scode
!= SPU_EQ
))
760 emit_insn (gen_subdf3 (reg
, spu_compare_op1
, spu_compare_op0
));
762 emit_insn (gen_subdf3 (reg
, spu_compare_op0
, spu_compare_op1
));
764 spu_compare_op0
= reg
;
765 spu_compare_op1
= CONST0_RTX (DFmode
);
768 if (is_set
== 0 && spu_compare_op1
== const0_rtx
769 && (GET_MODE (spu_compare_op0
) == SImode
770 || GET_MODE (spu_compare_op0
) == HImode
) && scode
== SPU_EQ
)
772 /* Don't need to set a register with the result when we are
773 comparing against zero and branching. */
774 reverse_test
= !reverse_test
;
775 compare_result
= spu_compare_op0
;
779 compare_result
= gen_reg_rtx (comp_mode
);
783 rtx t
= spu_compare_op1
;
784 spu_compare_op1
= spu_compare_op0
;
788 if (spu_comp_icode
[index
][scode
] == 0)
791 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[1].predicate
)
792 (spu_compare_op0
, op_mode
))
793 spu_compare_op0
= force_reg (op_mode
, spu_compare_op0
);
794 if (!(*insn_data
[spu_comp_icode
[index
][scode
]].operand
[2].predicate
)
795 (spu_compare_op1
, op_mode
))
796 spu_compare_op1
= force_reg (op_mode
, spu_compare_op1
);
797 comp_rtx
= GEN_FCN (spu_comp_icode
[index
][scode
]) (compare_result
,
802 emit_insn (comp_rtx
);
811 /* We don't have branch on QI compare insns, so we convert the
812 QI compare result to a HI result. */
813 if (comp_mode
== QImode
)
815 rtx old_res
= compare_result
;
816 compare_result
= gen_reg_rtx (HImode
);
818 emit_insn (gen_extendqihi2 (compare_result
, old_res
));
822 bcomp
= gen_rtx_EQ (comp_mode
, compare_result
, const0_rtx
);
824 bcomp
= gen_rtx_NE (comp_mode
, compare_result
, const0_rtx
);
826 loc_ref
= gen_rtx_LABEL_REF (VOIDmode
, target
);
827 emit_jump_insn (gen_rtx_SET (VOIDmode
, pc_rtx
,
828 gen_rtx_IF_THEN_ELSE (VOIDmode
, bcomp
,
831 else if (is_set
== 2)
833 int compare_size
= GET_MODE_BITSIZE (comp_mode
);
834 int target_size
= GET_MODE_BITSIZE (GET_MODE (target
));
835 enum machine_mode mode
= mode_for_size (target_size
, MODE_INT
, 0);
837 rtx op_t
= operands
[2];
838 rtx op_f
= operands
[3];
840 /* The result of the comparison can be SI, HI or QI mode. Create a
841 mask based on that result. */
842 if (target_size
> compare_size
)
844 select_mask
= gen_reg_rtx (mode
);
845 emit_insn (gen_extend_compare (select_mask
, compare_result
));
847 else if (target_size
< compare_size
)
849 gen_rtx_SUBREG (mode
, compare_result
,
850 (compare_size
- target_size
) / BITS_PER_UNIT
);
851 else if (comp_mode
!= mode
)
852 select_mask
= gen_rtx_SUBREG (mode
, compare_result
, 0);
854 select_mask
= compare_result
;
856 if (GET_MODE (target
) != GET_MODE (op_t
)
857 || GET_MODE (target
) != GET_MODE (op_f
))
861 emit_insn (gen_selb (target
, op_t
, op_f
, select_mask
));
863 emit_insn (gen_selb (target
, op_f
, op_t
, select_mask
));
868 emit_insn (gen_rtx_SET (VOIDmode
, compare_result
,
869 gen_rtx_NOT (comp_mode
, compare_result
)));
870 if (GET_MODE (target
) == SImode
&& GET_MODE (compare_result
) == HImode
)
871 emit_insn (gen_extendhisi2 (target
, compare_result
));
872 else if (GET_MODE (target
) == SImode
873 && GET_MODE (compare_result
) == QImode
)
874 emit_insn (gen_extend_compare (target
, compare_result
));
876 emit_move_insn (target
, compare_result
);
881 const_double_to_hwint (rtx x
)
885 if (GET_MODE (x
) == SFmode
)
887 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
888 REAL_VALUE_TO_TARGET_SINGLE (rv
, val
);
890 else if (GET_MODE (x
) == DFmode
)
893 REAL_VALUE_FROM_CONST_DOUBLE (rv
, x
);
894 REAL_VALUE_TO_TARGET_DOUBLE (rv
, l
);
896 val
= (val
<< 32) | (l
[1] & 0xffffffff);
904 hwint_to_const_double (enum machine_mode mode
, HOST_WIDE_INT v
)
908 gcc_assert (mode
== SFmode
|| mode
== DFmode
);
911 tv
[0] = (v
<< 32) >> 32;
912 else if (mode
== DFmode
)
914 tv
[1] = (v
<< 32) >> 32;
917 real_from_target (&rv
, tv
, mode
);
918 return CONST_DOUBLE_FROM_REAL_VALUE (rv
, mode
);
922 print_operand_address (FILE * file
, register rtx addr
)
927 if (GET_CODE (addr
) == AND
928 && GET_CODE (XEXP (addr
, 1)) == CONST_INT
929 && INTVAL (XEXP (addr
, 1)) == -16)
930 addr
= XEXP (addr
, 0);
932 switch (GET_CODE (addr
))
935 fprintf (file
, "0(%s)", reg_names
[REGNO (addr
)]);
939 reg
= XEXP (addr
, 0);
940 offset
= XEXP (addr
, 1);
941 if (GET_CODE (offset
) == REG
)
943 fprintf (file
, "%s,%s", reg_names
[REGNO (reg
)],
944 reg_names
[REGNO (offset
)]);
946 else if (GET_CODE (offset
) == CONST_INT
)
948 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
"(%s)",
949 INTVAL (offset
), reg_names
[REGNO (reg
)]);
959 output_addr_const (file
, addr
);
969 print_operand (FILE * file
, rtx x
, int code
)
971 enum machine_mode mode
= GET_MODE (x
);
973 unsigned char arr
[16];
974 int xcode
= GET_CODE (x
);
975 if (GET_MODE (x
) == VOIDmode
)
978 case 'H': /* 128 bits, signed */
979 case 'L': /* 128 bits, signed */
980 case 'm': /* 128 bits, signed */
981 case 'T': /* 128 bits, signed */
982 case 't': /* 128 bits, signed */
985 case 'G': /* 64 bits, signed */
986 case 'K': /* 64 bits, signed */
987 case 'k': /* 64 bits, signed */
988 case 'D': /* 64 bits, signed */
989 case 'd': /* 64 bits, signed */
992 case 'F': /* 32 bits, signed */
993 case 'J': /* 32 bits, signed */
994 case 'j': /* 32 bits, signed */
995 case 's': /* 32 bits, signed */
996 case 'S': /* 32 bits, signed */
1003 case 'j': /* 32 bits, signed */
1004 case 'k': /* 64 bits, signed */
1005 case 'm': /* 128 bits, signed */
1006 if (xcode
== CONST_INT
1007 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1009 gcc_assert (logical_immediate_p (x
, mode
));
1010 constant_to_array (mode
, x
, arr
);
1011 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1012 val
= trunc_int_for_mode (val
, SImode
);
1013 switch (which_logical_immediate (val
))
1018 fprintf (file
, "h");
1021 fprintf (file
, "b");
1031 case 'J': /* 32 bits, signed */
1032 case 'K': /* 64 bits, signed */
1033 case 'L': /* 128 bits, signed */
1034 if (xcode
== CONST_INT
1035 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1037 gcc_assert (logical_immediate_p (x
, mode
)
1038 || iohl_immediate_p (x
, mode
));
1039 constant_to_array (mode
, x
, arr
);
1040 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1041 val
= trunc_int_for_mode (val
, SImode
);
1042 switch (which_logical_immediate (val
))
1048 val
= trunc_int_for_mode (val
, HImode
);
1051 val
= trunc_int_for_mode (val
, QImode
);
1056 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1062 case 't': /* 128 bits, signed */
1063 case 'd': /* 64 bits, signed */
1064 case 's': /* 32 bits, signed */
1065 if (xcode
== CONST_INT
1066 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1068 gcc_assert (immediate_load_p (x
, mode
));
1069 constant_to_array (mode
, x
, arr
);
1070 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1071 val
= trunc_int_for_mode (val
, SImode
);
1072 switch (which_immediate_load (val
))
1077 fprintf (file
, "a");
1080 fprintf (file
, "h");
1083 fprintf (file
, "hu");
1089 else if (xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
|| xcode
== CONST
)
1090 fprintf (file
, "a");
1091 else if (xcode
== HIGH
)
1092 fprintf (file
, "hu");
1097 case 'T': /* 128 bits, signed */
1098 case 'D': /* 64 bits, signed */
1099 case 'S': /* 32 bits, signed */
1100 if (xcode
== CONST_INT
1101 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1103 gcc_assert (immediate_load_p (x
, mode
));
1104 constant_to_array (mode
, x
, arr
);
1105 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
1106 val
= trunc_int_for_mode (val
, SImode
);
1107 switch (which_immediate_load (val
))
1114 val
= trunc_int_for_mode (((arr
[0] << 8) | arr
[1]), HImode
);
1119 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, val
);
1121 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1122 output_addr_const (file
, x
);
1123 else if (xcode
== HIGH
)
1125 output_addr_const (file
, XEXP (x
, 0));
1126 fprintf (file
, "@h");
1135 if (xcode
== CONST_INT
1136 || xcode
== CONST_DOUBLE
|| xcode
== CONST_VECTOR
)
1137 { /* immediate operand for fsmbi */
1139 HOST_WIDE_INT val
= 0;
1140 unsigned char arr
[16];
1141 constant_to_array (mode
, x
, arr
);
1142 for (i
= 0; i
< 16; i
++)
1147 print_operand (file
, GEN_INT (val
), 0);
1154 if (xcode
== CONST_INT
)
1156 /* Only 4 least significant bits are relevant for generate
1157 control word instructions. */
1158 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, INTVAL (x
) & 15);
1163 case 'M': /* print code for c*d */
1164 if (GET_CODE (x
) == CONST_INT
)
1168 fprintf (file
, "b");
1171 fprintf (file
, "h");
1174 fprintf (file
, "w");
1177 fprintf (file
, "d");
1186 case 'N': /* Negate the operand */
1187 if (xcode
== CONST_INT
)
1188 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
, -INTVAL (x
));
1189 else if (xcode
== CONST_VECTOR
)
1190 fprintf (file
, HOST_WIDE_INT_PRINT_DEC
,
1191 -INTVAL (CONST_VECTOR_ELT (x
, 0)));
1194 case 'I': /* enable/disable interrupts */
1195 if (xcode
== CONST_INT
)
1196 fprintf (file
, "%s", INTVAL (x
) == 0 ? "d" : "e");
1199 case 'b': /* branch modifiers */
1201 fprintf (file
, "%s", GET_MODE (x
) == HImode
? "h" : "");
1202 else if (COMPARISON_P (x
))
1203 fprintf (file
, "%s", xcode
== NE
? "n" : "");
1206 case 'i': /* indirect call */
1209 if (GET_CODE (XEXP (x
, 0)) == REG
)
1210 /* Used in indirect function calls. */
1211 fprintf (file
, "%s", reg_names
[REGNO (XEXP (x
, 0))]);
1213 output_address (XEXP (x
, 0));
1217 case 'p': /* load/store */
1221 xcode
= GET_CODE (x
);
1226 xcode
= GET_CODE (x
);
1229 fprintf (file
, "d");
1230 else if (xcode
== CONST_INT
)
1231 fprintf (file
, "a");
1232 else if (xcode
== CONST
|| xcode
== SYMBOL_REF
|| xcode
== LABEL_REF
)
1233 fprintf (file
, "r");
1234 else if (xcode
== PLUS
|| xcode
== LO_SUM
)
1236 if (GET_CODE (XEXP (x
, 1)) == REG
)
1237 fprintf (file
, "x");
1239 fprintf (file
, "d");
1245 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1246 else if (xcode
== MEM
)
1247 output_address (XEXP (x
, 0));
1248 else if (xcode
== CONST_VECTOR
)
1249 output_addr_const (file
, CONST_VECTOR_ELT (x
, 0));
1251 output_addr_const (file
, x
);
1255 output_operand_lossage ("invalid %%xn code");
1260 extern char call_used_regs
[];
1261 extern char regs_ever_live
[];
1263 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1264 caller saved register. For leaf functions it is more efficient to
1265 use a volatile register because we won't need to save and restore the
1266 pic register. This routine is only valid after register allocation
1267 is completed, so we can pick an unused register. */
1271 rtx pic_reg
= pic_offset_table_rtx
;
1272 if (!reload_completed
&& !reload_in_progress
)
1277 /* Split constant addresses to handle cases that are too large. Also, add in
1278 the pic register when in PIC mode. */
1280 spu_split_address (rtx
* ops
)
1282 if (TARGET_LARGE_MEM
1283 || (GET_CODE (ops
[1]) == CONST
&& !legitimate_const (ops
[1], 0)))
1285 emit_insn (gen_high (ops
[0], ops
[1]));
1286 emit_insn (gen_low (ops
[0], ops
[0], ops
[1]));
1289 emit_insn (gen_pic (ops
[0], ops
[1]));
1292 rtx pic_reg
= get_pic_reg ();
1293 emit_insn (gen_addsi3 (ops
[0], ops
[0], pic_reg
));
1294 current_function_uses_pic_offset_table
= 1;
1298 /* SAVING is TRUE when we are generating the actual load and store
1299 instructions for REGNO. When determining the size of the stack
1300 needed for saving register we must allocate enough space for the
1301 worst case, because we don't always have the information early enough
1302 to not allocate it. But we can at least eliminate the actual loads
1303 and stores during the prologue/epilogue. */
1305 need_to_save_reg (int regno
, int saving
)
1307 if (regs_ever_live
[regno
] && !call_used_regs
[regno
])
1310 && regno
== PIC_OFFSET_TABLE_REGNUM
1311 && (!saving
|| current_function_uses_pic_offset_table
)
1313 || !current_function_is_leaf
|| regs_ever_live
[LAST_ARG_REGNUM
]))
1318 /* This function is only correct starting with local register
1321 spu_saved_regs_size (void)
1323 int reg_save_size
= 0;
1326 for (regno
= FIRST_PSEUDO_REGISTER
- 1; regno
>= 0; --regno
)
1327 if (need_to_save_reg (regno
, 0))
1328 reg_save_size
+= 0x10;
1329 return reg_save_size
;
1333 frame_emit_store (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1335 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1337 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1338 return emit_insn (gen_movv4si (mem
, reg
));
1342 frame_emit_load (int regno
, rtx addr
, HOST_WIDE_INT offset
)
1344 rtx reg
= gen_rtx_REG (V4SImode
, regno
);
1346 gen_frame_mem (V4SImode
, gen_rtx_PLUS (Pmode
, addr
, GEN_INT (offset
)));
1347 return emit_insn (gen_movv4si (reg
, mem
));
1350 /* This happens after reload, so we need to expand it. */
1352 frame_emit_add_imm (rtx dst
, rtx src
, HOST_WIDE_INT imm
, rtx scratch
)
1355 if (satisfies_constraint_K (GEN_INT (imm
)))
1357 insn
= emit_insn (gen_addsi3 (dst
, src
, GEN_INT (imm
)));
1361 insn
= emit_insn (gen_movsi (scratch
, gen_int_mode (imm
, SImode
)));
1362 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1364 insn
= emit_insn (gen_addsi3 (dst
, src
, scratch
));
1365 if (REGNO (src
) == REGNO (scratch
))
1368 if (REGNO (dst
) == REGNO (scratch
))
1369 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1374 /* Return nonzero if this function is known to have a null epilogue. */
1377 direct_return (void)
1379 if (reload_completed
)
1381 if (cfun
->static_chain_decl
== 0
1382 && (spu_saved_regs_size ()
1384 + current_function_outgoing_args_size
1385 + current_function_pretend_args_size
== 0)
1386 && current_function_is_leaf
)
1393 The stack frame looks like this:
1400 prev SP | back chain |
1403 | reg save | current_function_pretend_args_size bytes
1406 | saved regs | spu_saved_regs_size() bytes
1409 FP | vars | get_frame_size() bytes
1413 | args | current_function_outgoing_args_size bytes
1423 spu_expand_prologue (void)
1425 HOST_WIDE_INT size
= get_frame_size (), offset
, regno
;
1426 HOST_WIDE_INT total_size
;
1427 HOST_WIDE_INT saved_regs_size
;
1428 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1429 rtx scratch_reg_0
, scratch_reg_1
;
1432 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1433 the "toplevel" insn chain. */
1434 emit_note (NOTE_INSN_DELETED
);
1436 if (flag_pic
&& optimize
== 0)
1437 current_function_uses_pic_offset_table
= 1;
1439 if (spu_naked_function_p (current_function_decl
))
1442 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1443 scratch_reg_1
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 2);
1445 saved_regs_size
= spu_saved_regs_size ();
1446 total_size
= size
+ saved_regs_size
1447 + current_function_outgoing_args_size
1448 + current_function_pretend_args_size
;
1450 if (!current_function_is_leaf
1451 || current_function_calls_alloca
|| total_size
> 0)
1452 total_size
+= STACK_POINTER_OFFSET
;
1454 /* Save this first because code after this might use the link
1455 register as a scratch register. */
1456 if (!current_function_is_leaf
)
1458 insn
= frame_emit_store (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1459 RTX_FRAME_RELATED_P (insn
) = 1;
1464 offset
= -current_function_pretend_args_size
;
1465 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1466 if (need_to_save_reg (regno
, 1))
1469 insn
= frame_emit_store (regno
, sp_reg
, offset
);
1470 RTX_FRAME_RELATED_P (insn
) = 1;
1474 if (flag_pic
&& current_function_uses_pic_offset_table
)
1476 rtx pic_reg
= get_pic_reg ();
1477 insn
= emit_insn (gen_load_pic_offset (pic_reg
, scratch_reg_0
));
1478 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1480 insn
= emit_insn (gen_subsi3 (pic_reg
, pic_reg
, scratch_reg_0
));
1481 REG_NOTES (insn
) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD
, const0_rtx
,
1487 if (flag_stack_check
)
1489 /* We compare agains total_size-1 because
1490 ($sp >= total_size) <=> ($sp > total_size-1) */
1491 rtx scratch_v4si
= gen_rtx_REG (V4SImode
, REGNO (scratch_reg_0
));
1492 rtx sp_v4si
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
1493 rtx size_v4si
= spu_const (V4SImode
, total_size
- 1);
1494 if (!satisfies_constraint_K (GEN_INT (total_size
- 1)))
1496 emit_move_insn (scratch_v4si
, size_v4si
);
1497 size_v4si
= scratch_v4si
;
1499 emit_insn (gen_cgt_v4si (scratch_v4si
, sp_v4si
, size_v4si
));
1500 emit_insn (gen_vec_extractv4si
1501 (scratch_reg_0
, scratch_v4si
, GEN_INT (1)));
1502 emit_insn (gen_spu_heq (scratch_reg_0
, GEN_INT (0)));
1505 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1506 the value of the previous $sp because we save it as the back
1508 if (total_size
<= 2000)
1510 /* In this case we save the back chain first. */
1511 insn
= frame_emit_store (STACK_POINTER_REGNUM
, sp_reg
, -total_size
);
1512 RTX_FRAME_RELATED_P (insn
) = 1;
1514 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_0
);
1516 else if (satisfies_constraint_K (GEN_INT (-total_size
)))
1518 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1519 RTX_FRAME_RELATED_P (insn
) = 1;
1521 emit_insn (gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
)));
1525 insn
= emit_move_insn (scratch_reg_0
, sp_reg
);
1526 RTX_FRAME_RELATED_P (insn
) = 1;
1528 frame_emit_add_imm (sp_reg
, sp_reg
, -total_size
, scratch_reg_1
);
1530 RTX_FRAME_RELATED_P (insn
) = 1;
1531 real
= gen_addsi3 (sp_reg
, sp_reg
, GEN_INT (-total_size
));
1533 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR
, real
, REG_NOTES (insn
));
1535 if (total_size
> 2000)
1537 /* Save the back chain ptr */
1538 insn
= frame_emit_store (REGNO (scratch_reg_0
), sp_reg
, 0);
1539 RTX_FRAME_RELATED_P (insn
) = 1;
1542 if (frame_pointer_needed
)
1544 rtx fp_reg
= gen_rtx_REG (Pmode
, HARD_FRAME_POINTER_REGNUM
);
1545 HOST_WIDE_INT fp_offset
= STACK_POINTER_OFFSET
1546 + current_function_outgoing_args_size
;
1547 /* Set the new frame_pointer */
1548 frame_emit_add_imm (fp_reg
, sp_reg
, fp_offset
, scratch_reg_0
);
1552 emit_note (NOTE_INSN_DELETED
);
1556 spu_expand_epilogue (bool sibcall_p
)
1558 int size
= get_frame_size (), offset
, regno
;
1559 HOST_WIDE_INT saved_regs_size
, total_size
;
1560 rtx sp_reg
= gen_rtx_REG (Pmode
, STACK_POINTER_REGNUM
);
1561 rtx jump
, scratch_reg_0
;
1563 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1564 the "toplevel" insn chain. */
1565 emit_note (NOTE_INSN_DELETED
);
1567 if (spu_naked_function_p (current_function_decl
))
1570 scratch_reg_0
= gen_rtx_REG (SImode
, LAST_ARG_REGNUM
+ 1);
1572 saved_regs_size
= spu_saved_regs_size ();
1573 total_size
= size
+ saved_regs_size
1574 + current_function_outgoing_args_size
1575 + current_function_pretend_args_size
;
1577 if (!current_function_is_leaf
1578 || current_function_calls_alloca
|| total_size
> 0)
1579 total_size
+= STACK_POINTER_OFFSET
;
1583 if (current_function_calls_alloca
)
1584 /* Load it from the back chain because our save_stack_block and
1585 restore_stack_block do nothing. */
1586 frame_emit_load (STACK_POINTER_REGNUM
, sp_reg
, 0);
1588 frame_emit_add_imm (sp_reg
, sp_reg
, total_size
, scratch_reg_0
);
1591 if (saved_regs_size
> 0)
1593 offset
= -current_function_pretend_args_size
;
1594 for (regno
= 0; regno
< FIRST_PSEUDO_REGISTER
; ++regno
)
1595 if (need_to_save_reg (regno
, 1))
1598 frame_emit_load (regno
, sp_reg
, offset
);
1603 if (!current_function_is_leaf
)
1604 frame_emit_load (LINK_REGISTER_REGNUM
, sp_reg
, 16);
1608 emit_insn (gen_rtx_USE
1609 (VOIDmode
, gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
)));
1610 jump
= emit_jump_insn (gen__return ());
1611 emit_barrier_after (jump
);
1614 emit_note (NOTE_INSN_DELETED
);
1618 spu_return_addr (int count
, rtx frame ATTRIBUTE_UNUSED
)
1622 /* This is inefficient because it ends up copying to a save-register
1623 which then gets saved even though $lr has already been saved. But
1624 it does generate better code for leaf functions and we don't need
1625 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1626 used for __builtin_return_address anyway, so maybe we don't care if
1627 it's inefficient. */
1628 return get_hard_reg_initial_val (Pmode
, LINK_REGISTER_REGNUM
);
1632 /* Given VAL, generate a constant appropriate for MODE.
1633 If MODE is a vector mode, every element will be VAL.
1634 For TImode, VAL will be zero extended to 128 bits. */
1636 spu_const (enum machine_mode mode
, HOST_WIDE_INT val
)
1642 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
1643 || GET_MODE_CLASS (mode
) == MODE_FLOAT
1644 || GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
1645 || GET_MODE_CLASS (mode
) == MODE_VECTOR_FLOAT
);
1647 if (GET_MODE_CLASS (mode
) == MODE_INT
)
1648 return immed_double_const (val
, 0, mode
);
1650 /* val is the bit representation of the float */
1651 if (GET_MODE_CLASS (mode
) == MODE_FLOAT
)
1652 return hwint_to_const_double (mode
, val
);
1654 if (GET_MODE_CLASS (mode
) == MODE_VECTOR_INT
)
1655 inner
= immed_double_const (val
, 0, GET_MODE_INNER (mode
));
1657 inner
= hwint_to_const_double (GET_MODE_INNER (mode
), val
);
1659 units
= GET_MODE_NUNITS (mode
);
1661 v
= rtvec_alloc (units
);
1663 for (i
= 0; i
< units
; ++i
)
1664 RTVEC_ELT (v
, i
) = inner
;
1666 return gen_rtx_CONST_VECTOR (mode
, v
);
1669 /* branch hint stuff */
1671 /* The hardware requires 8 insns between a hint and the branch it
1672 effects. This variable describes how many rtl instructions the
1673 compiler needs to see before inserting a hint. (FIXME: We should
1674 accept less and insert nops to enforce it because hinting is always
1675 profitable for performance, but we do need to be careful of code
1677 int spu_hint_dist
= (8 * 4);
1679 /* An array of these is used to propagate hints to predecessor blocks. */
1682 rtx prop_jump
; /* propagated from another block */
1683 basic_block bb
; /* the original block. */
1686 /* The special $hbr register is used to prevent the insn scheduler from
1687 moving hbr insns across instructions which invalidate them. It
1688 should only be used in a clobber, and this function searches for
1689 insns which clobber it. */
1691 insn_clobbers_hbr (rtx insn
)
1693 if (INSN_P (insn
) && GET_CODE (PATTERN (insn
)) == PARALLEL
)
1695 rtx parallel
= PATTERN (insn
);
1698 for (j
= XVECLEN (parallel
, 0) - 1; j
>= 0; j
--)
1700 clobber
= XVECEXP (parallel
, 0, j
);
1701 if (GET_CODE (clobber
) == CLOBBER
1702 && GET_CODE (XEXP (clobber
, 0)) == REG
1703 && REGNO (XEXP (clobber
, 0)) == HBR_REGNUM
)
1711 spu_emit_branch_hint (rtx before
, rtx branch
, rtx target
, int distance
)
1714 rtx hint
, insn
, prev
, next
;
1716 if (before
== 0 || branch
== 0 || target
== 0)
1723 branch_label
= gen_label_rtx ();
1724 LABEL_NUSES (branch_label
)++;
1725 LABEL_PRESERVE_P (branch_label
) = 1;
1726 insn
= emit_label_before (branch_label
, branch
);
1727 branch_label
= gen_rtx_LABEL_REF (VOIDmode
, branch_label
);
1729 /* If the previous insn is pipe0, make the hbr dual issue with it. If
1730 the current insn is pipe0, dual issue with it. */
1731 prev
= prev_active_insn (before
);
1732 if (prev
&& get_pipe (prev
) == 0)
1733 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
1734 else if (get_pipe (before
) == 0 && distance
> spu_hint_dist
)
1736 next
= next_active_insn (before
);
1737 hint
= emit_insn_after (gen_hbr (branch_label
, target
), before
);
1739 PUT_MODE (next
, TImode
);
1743 hint
= emit_insn_before (gen_hbr (branch_label
, target
), before
);
1744 PUT_MODE (hint
, TImode
);
1746 recog_memoized (hint
);
1749 /* Returns 0 if we don't want a hint for this branch. Otherwise return
1750 the rtx for the branch target. */
1752 get_branch_target (rtx branch
)
1754 if (GET_CODE (branch
) == JUMP_INSN
)
1758 /* Return statements */
1759 if (GET_CODE (PATTERN (branch
)) == RETURN
)
1760 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
1763 if (GET_CODE (PATTERN (branch
)) == ADDR_VEC
1764 || GET_CODE (PATTERN (branch
)) == ADDR_DIFF_VEC
)
1767 set
= single_set (branch
);
1768 src
= SET_SRC (set
);
1769 if (GET_CODE (SET_DEST (set
)) != PC
)
1772 if (GET_CODE (src
) == IF_THEN_ELSE
)
1775 rtx note
= find_reg_note (branch
, REG_BR_PROB
, 0);
1778 /* If the more probable case is not a fall through, then
1779 try a branch hint. */
1780 HOST_WIDE_INT prob
= INTVAL (XEXP (note
, 0));
1781 if (prob
> (REG_BR_PROB_BASE
* 6 / 10)
1782 && GET_CODE (XEXP (src
, 1)) != PC
)
1783 lab
= XEXP (src
, 1);
1784 else if (prob
< (REG_BR_PROB_BASE
* 4 / 10)
1785 && GET_CODE (XEXP (src
, 2)) != PC
)
1786 lab
= XEXP (src
, 2);
1790 if (GET_CODE (lab
) == RETURN
)
1791 return gen_rtx_REG (SImode
, LINK_REGISTER_REGNUM
);
1799 else if (GET_CODE (branch
) == CALL_INSN
)
1802 /* All of our call patterns are in a PARALLEL and the CALL is
1803 the first pattern in the PARALLEL. */
1804 if (GET_CODE (PATTERN (branch
)) != PARALLEL
)
1806 call
= XVECEXP (PATTERN (branch
), 0, 0);
1807 if (GET_CODE (call
) == SET
)
1808 call
= SET_SRC (call
);
1809 if (GET_CODE (call
) != CALL
)
1811 return XEXP (XEXP (call
, 0), 0);
1817 insert_branch_hints (void)
1819 struct spu_bb_info
*spu_bb_info
;
1820 rtx branch
, insn
, next
;
1821 rtx branch_target
= 0;
1822 int branch_addr
= 0, insn_addr
, head_addr
;
1827 (struct spu_bb_info
*) xcalloc (last_basic_block
+ 1,
1828 sizeof (struct spu_bb_info
));
1830 /* We need exact insn addresses and lengths. */
1831 shorten_branches (get_insns ());
1833 FOR_EACH_BB_REVERSE (bb
)
1835 head_addr
= INSN_ADDRESSES (INSN_UID (BB_HEAD (bb
)));
1837 if (spu_bb_info
[bb
->index
].prop_jump
)
1839 branch
= spu_bb_info
[bb
->index
].prop_jump
;
1840 branch_target
= get_branch_target (branch
);
1841 branch_addr
= INSN_ADDRESSES (INSN_UID (branch
));
1843 /* Search from end of a block to beginning. In this loop, find
1844 jumps which need a branch and emit them only when:
1845 - it's an indirect branch and we're at the insn which sets
1847 - we're at an insn that will invalidate the hint. e.g., a
1848 call, another hint insn, inline asm that clobbers $hbr, and
1849 some inlined operations (divmodsi4). Don't consider jumps
1850 because they are only at the end of a block and are
1851 considered when we are deciding whether to propagate
1852 - we're getting too far away from the branch. The hbr insns
1853 only have a signed 10 bit offset
1854 We go back as far as possible so the branch will be considered
1855 for propagation when we get to the beginning of the block. */
1857 for (insn
= BB_END (bb
); insn
; insn
= PREV_INSN (insn
))
1861 insn_addr
= INSN_ADDRESSES (INSN_UID (insn
));
1863 && ((GET_CODE (branch_target
) == REG
1864 && set_of (branch_target
, insn
) != NULL_RTX
)
1865 || insn_clobbers_hbr (insn
)
1866 || branch_addr
- insn_addr
> 600))
1868 int next_addr
= INSN_ADDRESSES (INSN_UID (next
));
1869 if (insn
!= BB_END (bb
)
1870 && branch_addr
- next_addr
>= spu_hint_dist
)
1874 "hint for %i in block %i before %i\n",
1875 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
1876 spu_emit_branch_hint (next
, branch
, branch_target
,
1877 branch_addr
- next_addr
);
1882 /* JUMP_P will only be true at the end of a block. When
1883 branch is already set it means we've previously decided
1884 to propagate a hint for that branch into this block. */
1885 if (CALL_P (insn
) || (JUMP_P (insn
) && !branch
))
1888 if ((branch_target
= get_branch_target (insn
)))
1891 branch_addr
= insn_addr
;
1895 /* When a branch hint is emitted it will be inserted
1896 before "next". Make sure next is the beginning of a
1897 cycle to minimize impact on the scheduled insns. */
1898 if (GET_MODE (insn
) == TImode
)
1901 if (insn
== BB_HEAD (bb
))
1907 /* If we haven't emitted a hint for this branch yet, it might
1908 be profitable to emit it in one of the predecessor blocks,
1909 especially for loops. */
1911 basic_block prev
= 0, prop
= 0, prev2
= 0;
1912 int loop_exit
= 0, simple_loop
= 0;
1915 next_addr
= INSN_ADDRESSES (INSN_UID (next
));
1917 for (j
= 0; j
< EDGE_COUNT (bb
->preds
); j
++)
1918 if (EDGE_PRED (bb
, j
)->flags
& EDGE_FALLTHRU
)
1919 prev
= EDGE_PRED (bb
, j
)->src
;
1921 prev2
= EDGE_PRED (bb
, j
)->src
;
1923 for (j
= 0; j
< EDGE_COUNT (bb
->succs
); j
++)
1924 if (EDGE_SUCC (bb
, j
)->flags
& EDGE_LOOP_EXIT
)
1926 else if (EDGE_SUCC (bb
, j
)->dest
== bb
)
1929 /* If this branch is a loop exit then propagate to previous
1930 fallthru block. This catches the cases when it is a simple
1931 loop or when there is an initial branch into the loop. */
1932 if (prev
&& loop_exit
&& prev
->loop_depth
<= bb
->loop_depth
)
1935 /* If there is only one adjacent predecessor. Don't propagate
1936 outside this loop. This loop_depth test isn't perfect, but
1937 I'm not sure the loop_father member is valid at this point. */
1938 else if (prev
&& single_pred_p (bb
)
1939 && prev
->loop_depth
== bb
->loop_depth
)
1942 /* If this is the JOIN block of a simple IF-THEN then
1943 propogate the hint to the HEADER block. */
1944 else if (prev
&& prev2
1945 && EDGE_COUNT (bb
->preds
) == 2
1946 && EDGE_COUNT (prev
->preds
) == 1
1947 && EDGE_PRED (prev
, 0)->src
== prev2
1948 && prev2
->loop_depth
== bb
->loop_depth
1949 && GET_CODE (branch_target
) != REG
)
1952 /* Don't propagate when:
1953 - this is a simple loop and the hint would be too far
1954 - this is not a simple loop and there are 16 insns in
1956 - the predecessor block ends in a branch that will be
1958 - the predecessor block ends in an insn that invalidates
1962 && (bbend
= BB_END (prop
))
1963 && branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)) <
1964 (simple_loop
? 600 : 16 * 4) && get_branch_target (bbend
) == 0
1965 && (JUMP_P (bbend
) || !insn_clobbers_hbr (bbend
)))
1968 fprintf (dump_file
, "propagate from %i to %i (loop depth %i) "
1969 "for %i (loop_exit %i simple_loop %i dist %i)\n",
1970 bb
->index
, prop
->index
, bb
->loop_depth
,
1971 INSN_UID (branch
), loop_exit
, simple_loop
,
1972 branch_addr
- INSN_ADDRESSES (INSN_UID (bbend
)));
1974 spu_bb_info
[prop
->index
].prop_jump
= branch
;
1975 spu_bb_info
[prop
->index
].bb
= bb
;
1977 else if (next
&& branch_addr
- next_addr
>= spu_hint_dist
)
1980 fprintf (dump_file
, "hint for %i in block %i before %i\n",
1981 INSN_UID (branch
), bb
->index
, INSN_UID (next
));
1982 spu_emit_branch_hint (next
, branch
, branch_target
,
1983 branch_addr
- next_addr
);
1991 /* Emit a nop for INSN such that the two will dual issue. This assumes
1992 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
1993 We check for TImode to handle a MULTI1 insn which has dual issued its
1994 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
1997 emit_nop_for_insn (rtx insn
)
2001 p
= get_pipe (insn
);
2002 if (p
== 1 && GET_MODE (insn
) == TImode
)
2004 new_insn
= emit_insn_before (gen_nopn (GEN_INT (127)), insn
);
2005 PUT_MODE (new_insn
, TImode
);
2006 PUT_MODE (insn
, VOIDmode
);
2009 new_insn
= emit_insn_after (gen_lnop (), insn
);
2012 /* Insert nops in basic blocks to meet dual issue alignment
2017 rtx insn
, next_insn
, prev_insn
;
2021 /* This sets up INSN_ADDRESSES. */
2022 shorten_branches (get_insns ());
2024 /* Keep track of length added by nops. */
2028 for (insn
= get_insns (); insn
; insn
= next_insn
)
2030 next_insn
= next_active_insn (insn
);
2031 addr
= INSN_ADDRESSES (INSN_UID (insn
));
2032 if (GET_MODE (insn
) == TImode
2034 && GET_MODE (next_insn
) != TImode
2035 && ((addr
+ length
) & 7) != 0)
2037 /* prev_insn will always be set because the first insn is
2038 always 8-byte aligned. */
2039 emit_nop_for_insn (prev_insn
);
2047 spu_machine_dependent_reorg (void)
2051 if (TARGET_BRANCH_HINTS
)
2052 insert_branch_hints ();
2058 /* Insn scheduling routines, primarily for dual issue. */
2060 spu_sched_issue_rate (void)
2066 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED
,
2067 int verbose ATTRIBUTE_UNUSED
, rtx insn
,
2070 if (GET_CODE (PATTERN (insn
)) != USE
2071 && GET_CODE (PATTERN (insn
)) != CLOBBER
2072 && get_pipe (insn
) != -2)
2074 return can_issue_more
;
2081 /* Handle inline asm */
2082 if (INSN_CODE (insn
) == -1)
2084 t
= get_attr_type (insn
);
2100 case TYPE_IPREFETCH
:
2117 spu_sched_adjust_priority (rtx insn
, int pri
)
2119 int p
= get_pipe (insn
);
2120 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2122 if (GET_CODE (PATTERN (insn
)) == USE
2123 || GET_CODE (PATTERN (insn
)) == CLOBBER
2126 /* Schedule pipe0 insns early for greedier dual issue. */
2132 /* INSN is dependent on DEP_INSN. */
2134 spu_sched_adjust_cost (rtx insn
, rtx link ATTRIBUTE_UNUSED
,
2135 rtx dep_insn ATTRIBUTE_UNUSED
, int cost
)
2137 if (GET_CODE (insn
) == CALL_INSN
)
2139 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2140 scheduler makes every insn in a block anti-dependent on the final
2141 jump_insn. We adjust here so higher cost insns will get scheduled
2143 if (GET_CODE (insn
) == JUMP_INSN
&& REG_NOTE_KIND (link
) == REG_DEP_ANTI
)
2144 return INSN_COST (dep_insn
) - 3;
2148 /* Create a CONST_DOUBLE from a string. */
2150 spu_float_const (const char *string
, enum machine_mode mode
)
2152 REAL_VALUE_TYPE value
;
2153 value
= REAL_VALUE_ATOF (string
, mode
);
2154 return CONST_DOUBLE_FROM_REAL_VALUE (value
, mode
);
2157 /* Given a (CONST (PLUS (SYMBOL_REF) (CONST_INT))) return TRUE when the
2158 CONST_INT fits constraint 'K', i.e., is small. */
2160 legitimate_const (rtx x
, int aligned
)
2162 /* We can never know if the resulting address fits in 18 bits and can be
2163 loaded with ila. Instead we should use the HI and LO relocations to
2164 load a 32 bit address. */
2167 gcc_assert (GET_CODE (x
) == CONST
);
2169 if (GET_CODE (XEXP (x
, 0)) != PLUS
)
2171 sym
= XEXP (XEXP (x
, 0), 0);
2172 cst
= XEXP (XEXP (x
, 0), 1);
2173 if (GET_CODE (sym
) != SYMBOL_REF
|| GET_CODE (cst
) != CONST_INT
)
2175 if (aligned
&& ((INTVAL (cst
) & 15) != 0 || !ALIGNED_SYMBOL_REF_P (sym
)))
2177 return satisfies_constraint_K (cst
);
2181 spu_constant_address_p (rtx x
)
2183 return (GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == SYMBOL_REF
2184 || GET_CODE (x
) == CONST_INT
|| GET_CODE (x
) == CONST
2185 || GET_CODE (x
) == HIGH
);
2188 static enum spu_immediate
2189 which_immediate_load (HOST_WIDE_INT val
)
2191 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2193 if (val
>= -0x8000 && val
<= 0x7fff)
2195 if (val
>= 0 && val
<= 0x3ffff)
2197 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2199 if ((val
& 0xffff) == 0)
2206 immediate_load_p (rtx op
, enum machine_mode mode
)
2209 unsigned char arr
[16];
2211 if (GET_MODE (op
) != VOIDmode
)
2212 mode
= GET_MODE (op
);
2214 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2215 || GET_CODE (op
) == CONST_VECTOR
);
2217 /* V4SI with all identical symbols is valid. */
2218 if (mode
== V4SImode
2219 && GET_CODE (CONST_VECTOR_ELT (op
, 0)) == SYMBOL_REF
)
2220 return !TARGET_LARGE_MEM
&& !flag_pic
2221 && CONST_VECTOR_ELT (op
, 0) == CONST_VECTOR_ELT (op
, 1)
2222 && CONST_VECTOR_ELT (op
, 1) == CONST_VECTOR_ELT (op
, 2)
2223 && CONST_VECTOR_ELT (op
, 2) == CONST_VECTOR_ELT (op
, 3);
2225 constant_to_array (mode
, op
, arr
);
2227 /* Check that bytes are repeated. */
2228 for (i
= 4; i
< 16; i
+= 4)
2229 for (j
= 0; j
< 4; j
++)
2230 if (arr
[j
] != arr
[i
+ j
])
2233 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2234 val
= trunc_int_for_mode (val
, SImode
);
2236 return which_immediate_load (val
) != SPU_NONE
;
2239 static enum spu_immediate
2240 which_logical_immediate (HOST_WIDE_INT val
)
2242 gcc_assert (val
== trunc_int_for_mode (val
, SImode
));
2244 if (val
>= -0x200 && val
<= 0x1ff)
2246 if (val
>= 0 && val
<= 0xffff)
2248 if ((val
& 0xffff) == ((val
>> 16) & 0xffff))
2250 val
= trunc_int_for_mode (val
, HImode
);
2251 if (val
>= -0x200 && val
<= 0x1ff)
2253 if ((val
& 0xff) == ((val
>> 8) & 0xff))
2255 val
= trunc_int_for_mode (val
, QImode
);
2256 if (val
>= -0x200 && val
<= 0x1ff)
2264 logical_immediate_p (rtx op
, enum machine_mode mode
)
2267 unsigned char arr
[16];
2270 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2271 || GET_CODE (op
) == CONST_VECTOR
);
2273 if (GET_MODE (op
) != VOIDmode
)
2274 mode
= GET_MODE (op
);
2276 constant_to_array (mode
, op
, arr
);
2278 /* Check that bytes are repeated. */
2279 for (i
= 4; i
< 16; i
+= 4)
2280 for (j
= 0; j
< 4; j
++)
2281 if (arr
[j
] != arr
[i
+ j
])
2284 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2285 val
= trunc_int_for_mode (val
, SImode
);
2287 i
= which_logical_immediate (val
);
2288 return i
!= SPU_NONE
&& i
!= SPU_IOHL
;
2292 iohl_immediate_p (rtx op
, enum machine_mode mode
)
2295 unsigned char arr
[16];
2298 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2299 || GET_CODE (op
) == CONST_VECTOR
);
2301 if (GET_MODE (op
) != VOIDmode
)
2302 mode
= GET_MODE (op
);
2304 constant_to_array (mode
, op
, arr
);
2306 /* Check that bytes are repeated. */
2307 for (i
= 4; i
< 16; i
+= 4)
2308 for (j
= 0; j
< 4; j
++)
2309 if (arr
[j
] != arr
[i
+ j
])
2312 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
2313 val
= trunc_int_for_mode (val
, SImode
);
2315 return val
>= 0 && val
<= 0xffff;
2319 arith_immediate_p (rtx op
, enum machine_mode mode
,
2320 HOST_WIDE_INT low
, HOST_WIDE_INT high
)
2323 unsigned char arr
[16];
2326 gcc_assert (GET_CODE (op
) == CONST_INT
|| GET_CODE (op
) == CONST_DOUBLE
2327 || GET_CODE (op
) == CONST_VECTOR
);
2329 if (GET_MODE (op
) != VOIDmode
)
2330 mode
= GET_MODE (op
);
2332 constant_to_array (mode
, op
, arr
);
2334 if (VECTOR_MODE_P (mode
))
2335 mode
= GET_MODE_INNER (mode
);
2337 bytes
= GET_MODE_SIZE (mode
);
2338 mode
= mode_for_size (GET_MODE_BITSIZE (mode
), MODE_INT
, 0);
2340 /* Check that bytes are repeated. */
2341 for (i
= bytes
; i
< 16; i
+= bytes
)
2342 for (j
= 0; j
< bytes
; j
++)
2343 if (arr
[j
] != arr
[i
+ j
])
2347 for (j
= 1; j
< bytes
; j
++)
2348 val
= (val
<< 8) | arr
[j
];
2350 val
= trunc_int_for_mode (val
, mode
);
2352 return val
>= low
&& val
<= high
;
2356 - any 32 bit constant (SImode, SFmode)
2357 - any constant that can be generated with fsmbi (any mode)
2358 - a 64 bit constant where the high and low bits are identical
2360 - a 128 bit constant where the four 32 bit words match. */
2362 spu_legitimate_constant_p (rtx x
)
2364 unsigned char arr
[16];
2367 if (GET_CODE (x
) == HIGH
2368 || GET_CODE (x
) == CONST
2369 || GET_CODE (x
) == SYMBOL_REF
2370 || GET_CODE (x
) == LABEL_REF
)
2373 if (fsmbi_const_p (x
))
2376 if (GET_CODE (x
) == CONST_INT
)
2377 return (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0x7fffffffll
)
2378 || ((INTVAL (x
) >> 32) & 0xffffffffll
) == (INTVAL (x
) & 0xffffffffll
);
2380 if (GET_MODE (x
) == SFmode
)
2383 if (GET_MODE (x
) == DFmode
)
2385 HOST_WIDE_INT val
= const_double_to_hwint (x
);
2386 return ((val
>> 32) & 0xffffffffll
) == (val
& 0xffffffffll
);
2389 /* V4SI with all identical symbols is valid. */
2390 if (GET_MODE (x
) == V4SImode
2391 && (GET_CODE (CONST_VECTOR_ELT (x
, 0)) == SYMBOL_REF
2392 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == LABEL_REF
2393 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == CONST
2394 || GET_CODE (CONST_VECTOR_ELT (x
, 0)) == HIGH
))
2395 return CONST_VECTOR_ELT (x
, 0) == CONST_VECTOR_ELT (x
, 1)
2396 && CONST_VECTOR_ELT (x
, 1) == CONST_VECTOR_ELT (x
, 2)
2397 && CONST_VECTOR_ELT (x
, 2) == CONST_VECTOR_ELT (x
, 3);
2399 if (VECTOR_MODE_P (GET_MODE (x
)))
2400 for (i
= 0; i
< GET_MODE_NUNITS (GET_MODE (x
)); i
++)
2401 if (GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_INT
2402 && GET_CODE (CONST_VECTOR_ELT (x
, i
)) != CONST_DOUBLE
)
2405 constant_to_array (SImode
, x
, arr
);
2407 /* Check that bytes are repeated. */
2408 for (i
= 4; i
< 16; i
+= 4)
2409 for (j
= 0; j
< 4; j
++)
2410 if (arr
[j
] != arr
[i
+ j
])
2416 /* Valid address are:
2417 - symbol_ref, label_ref, const
2419 - reg + const, where either reg or const is 16 byte aligned
2420 - reg + reg, alignment doesn't matter
2421 The alignment matters in the reg+const case because lqd and stqd
2422 ignore the 4 least significant bits of the const. (TODO: It might be
2423 preferable to allow any alignment and fix it up when splitting.) */
2425 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED
,
2426 rtx x
, int reg_ok_strict
)
2428 if (mode
== TImode
&& GET_CODE (x
) == AND
2429 && GET_CODE (XEXP (x
, 1)) == CONST_INT
2430 && INTVAL (XEXP (x
, 1)) == (HOST_WIDE_INT
) -16)
2432 switch (GET_CODE (x
))
2436 return !TARGET_LARGE_MEM
;
2439 return !TARGET_LARGE_MEM
&& legitimate_const (x
, 0);
2442 return INTVAL (x
) >= 0 && INTVAL (x
) <= 0x3ffff;
2446 gcc_assert (GET_CODE (x
) == REG
);
2449 return INT_REG_OK_FOR_BASE_P (x
, reg_ok_strict
);
2454 rtx op0
= XEXP (x
, 0);
2455 rtx op1
= XEXP (x
, 1);
2456 if (GET_CODE (op0
) == SUBREG
)
2457 op0
= XEXP (op0
, 0);
2458 if (GET_CODE (op1
) == SUBREG
)
2459 op1
= XEXP (op1
, 0);
2460 /* We can't just accept any aligned register because CSE can
2461 change it to a register that is not marked aligned and then
2462 recog will fail. So we only accept frame registers because
2463 they will only be changed to other frame registers. */
2464 if (GET_CODE (op0
) == REG
2465 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2466 && GET_CODE (op1
) == CONST_INT
2467 && INTVAL (op1
) >= -0x2000
2468 && INTVAL (op1
) <= 0x1fff
2469 && (REGNO_PTR_FRAME_P (REGNO (op0
)) || (INTVAL (op1
) & 15) == 0))
2471 if (GET_CODE (op0
) == REG
2472 && INT_REG_OK_FOR_BASE_P (op0
, reg_ok_strict
)
2473 && GET_CODE (op1
) == REG
2474 && INT_REG_OK_FOR_INDEX_P (op1
, reg_ok_strict
))
2485 /* When the address is reg + const_int, force the const_int into a
2488 spu_legitimize_address (rtx x
, rtx oldx ATTRIBUTE_UNUSED
,
2489 enum machine_mode mode
)
2492 /* Make sure both operands are registers. */
2493 if (GET_CODE (x
) == PLUS
)
2497 if (ALIGNED_SYMBOL_REF_P (op0
))
2499 op0
= force_reg (Pmode
, op0
);
2500 mark_reg_pointer (op0
, 128);
2502 else if (GET_CODE (op0
) != REG
)
2503 op0
= force_reg (Pmode
, op0
);
2504 if (ALIGNED_SYMBOL_REF_P (op1
))
2506 op1
= force_reg (Pmode
, op1
);
2507 mark_reg_pointer (op1
, 128);
2509 else if (GET_CODE (op1
) != REG
)
2510 op1
= force_reg (Pmode
, op1
);
2511 x
= gen_rtx_PLUS (Pmode
, op0
, op1
);
2512 if (spu_legitimate_address (mode
, x
, 0))
2518 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2519 struct attribute_spec.handler. */
2521 spu_handle_fndecl_attribute (tree
* node
,
2523 tree args ATTRIBUTE_UNUSED
,
2524 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2526 if (TREE_CODE (*node
) != FUNCTION_DECL
)
2528 warning (0, "`%s' attribute only applies to functions",
2529 IDENTIFIER_POINTER (name
));
2530 *no_add_attrs
= true;
2536 /* Handle the "vector" attribute. */
2538 spu_handle_vector_attribute (tree
* node
, tree name
,
2539 tree args ATTRIBUTE_UNUSED
,
2540 int flags ATTRIBUTE_UNUSED
, bool * no_add_attrs
)
2542 tree type
= *node
, result
= NULL_TREE
;
2543 enum machine_mode mode
;
2546 while (POINTER_TYPE_P (type
)
2547 || TREE_CODE (type
) == FUNCTION_TYPE
2548 || TREE_CODE (type
) == METHOD_TYPE
|| TREE_CODE (type
) == ARRAY_TYPE
)
2549 type
= TREE_TYPE (type
);
2551 mode
= TYPE_MODE (type
);
2553 unsigned_p
= TYPE_UNSIGNED (type
);
2557 result
= (unsigned_p
? unsigned_V2DI_type_node
: V2DI_type_node
);
2560 result
= (unsigned_p
? unsigned_V4SI_type_node
: V4SI_type_node
);
2563 result
= (unsigned_p
? unsigned_V8HI_type_node
: V8HI_type_node
);
2566 result
= (unsigned_p
? unsigned_V16QI_type_node
: V16QI_type_node
);
2569 result
= V4SF_type_node
;
2572 result
= V2DF_type_node
;
2578 /* Propagate qualifiers attached to the element type
2579 onto the vector type. */
2580 if (result
&& result
!= type
&& TYPE_QUALS (type
))
2581 result
= build_qualified_type (result
, TYPE_QUALS (type
));
2583 *no_add_attrs
= true; /* No need to hang on to the attribute. */
2586 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name
));
2588 *node
= reconstruct_complex_type (*node
, result
);
2593 /* Return non-zero if FUNC is a naked function. */
2595 spu_naked_function_p (tree func
)
2599 if (TREE_CODE (func
) != FUNCTION_DECL
)
2602 a
= lookup_attribute ("naked", DECL_ATTRIBUTES (func
));
2603 return a
!= NULL_TREE
;
2607 spu_initial_elimination_offset (int from
, int to
)
2609 int saved_regs_size
= spu_saved_regs_size ();
2611 if (!current_function_is_leaf
|| current_function_outgoing_args_size
2612 || get_frame_size () || saved_regs_size
)
2613 sp_offset
= STACK_POINTER_OFFSET
;
2614 if (from
== FRAME_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
2615 return (sp_offset
+ current_function_outgoing_args_size
);
2616 else if (from
== FRAME_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
2618 else if (from
== ARG_POINTER_REGNUM
&& to
== STACK_POINTER_REGNUM
)
2619 return sp_offset
+ current_function_outgoing_args_size
2620 + get_frame_size () + saved_regs_size
+ STACK_POINTER_OFFSET
;
2621 else if (from
== ARG_POINTER_REGNUM
&& to
== HARD_FRAME_POINTER_REGNUM
)
2622 return get_frame_size () + saved_regs_size
+ sp_offset
;
2627 spu_function_value (tree type
, tree func ATTRIBUTE_UNUSED
)
2629 enum machine_mode mode
= TYPE_MODE (type
);
2630 int byte_size
= ((mode
== BLKmode
)
2631 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2633 /* Make sure small structs are left justified in a register. */
2634 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
2635 && byte_size
<= UNITS_PER_WORD
* MAX_REGISTER_RETURN
&& byte_size
> 0)
2637 enum machine_mode smode
;
2640 int nregs
= (byte_size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
;
2641 int n
= byte_size
/ UNITS_PER_WORD
;
2642 v
= rtvec_alloc (nregs
);
2643 for (i
= 0; i
< n
; i
++)
2645 RTVEC_ELT (v
, i
) = gen_rtx_EXPR_LIST (VOIDmode
,
2646 gen_rtx_REG (TImode
,
2649 GEN_INT (UNITS_PER_WORD
* i
));
2650 byte_size
-= UNITS_PER_WORD
;
2658 smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
2660 gen_rtx_EXPR_LIST (VOIDmode
,
2661 gen_rtx_REG (smode
, FIRST_RETURN_REGNUM
+ n
),
2662 GEN_INT (UNITS_PER_WORD
* n
));
2664 return gen_rtx_PARALLEL (mode
, v
);
2666 return gen_rtx_REG (mode
, FIRST_RETURN_REGNUM
);
2670 spu_function_arg (CUMULATIVE_ARGS cum
,
2671 enum machine_mode mode
,
2672 tree type
, int named ATTRIBUTE_UNUSED
)
2676 if (cum
>= MAX_REGISTER_ARGS
)
2679 byte_size
= ((mode
== BLKmode
)
2680 ? int_size_in_bytes (type
) : GET_MODE_SIZE (mode
));
2682 /* The ABI does not allow parameters to be passed partially in
2683 reg and partially in stack. */
2684 if ((cum
+ (byte_size
+ 15) / 16) > MAX_REGISTER_ARGS
)
2687 /* Make sure small structs are left justified in a register. */
2688 if ((mode
== BLKmode
|| (type
&& AGGREGATE_TYPE_P (type
)))
2689 && byte_size
< UNITS_PER_WORD
&& byte_size
> 0)
2691 enum machine_mode smode
;
2695 smode
= smallest_mode_for_size (byte_size
* BITS_PER_UNIT
, MODE_INT
);
2696 gr_reg
= gen_rtx_EXPR_LIST (VOIDmode
,
2697 gen_rtx_REG (smode
, FIRST_ARG_REGNUM
+ cum
),
2699 return gen_rtx_PARALLEL (mode
, gen_rtvec (1, gr_reg
));
2702 return gen_rtx_REG (mode
, FIRST_ARG_REGNUM
+ cum
);
2705 /* Variable sized types are passed by reference. */
2707 spu_pass_by_reference (CUMULATIVE_ARGS
* cum ATTRIBUTE_UNUSED
,
2708 enum machine_mode mode ATTRIBUTE_UNUSED
,
2709 tree type
, bool named ATTRIBUTE_UNUSED
)
2711 return type
&& TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
;
2717 /* Create and return the va_list datatype.
2719 On SPU, va_list is an array type equivalent to
2721 typedef struct __va_list_tag
2723 void *__args __attribute__((__aligned(16)));
2724 void *__skip __attribute__((__aligned(16)));
2728 where __args points to the arg that will be returned by the next
2729 va_arg(), and __skip points to the previous stack frame such that
2730 when __args == __skip we should advance __args by 32 bytes. */
2732 spu_build_builtin_va_list (void)
2734 tree f_args
, f_skip
, record
, type_decl
;
2737 record
= (*lang_hooks
.types
.make_type
) (RECORD_TYPE
);
2740 build_decl (TYPE_DECL
, get_identifier ("__va_list_tag"), record
);
2742 f_args
= build_decl (FIELD_DECL
, get_identifier ("__args"), ptr_type_node
);
2743 f_skip
= build_decl (FIELD_DECL
, get_identifier ("__skip"), ptr_type_node
);
2745 DECL_FIELD_CONTEXT (f_args
) = record
;
2746 DECL_ALIGN (f_args
) = 128;
2747 DECL_USER_ALIGN (f_args
) = 1;
2749 DECL_FIELD_CONTEXT (f_skip
) = record
;
2750 DECL_ALIGN (f_skip
) = 128;
2751 DECL_USER_ALIGN (f_skip
) = 1;
2753 TREE_CHAIN (record
) = type_decl
;
2754 TYPE_NAME (record
) = type_decl
;
2755 TYPE_FIELDS (record
) = f_args
;
2756 TREE_CHAIN (f_args
) = f_skip
;
2758 /* We know this is being padded and we want it too. It is an internal
2759 type so hide the warnings from the user. */
2761 warn_padded
= false;
2763 layout_type (record
);
2767 /* The correct type is an array type of one element. */
2768 return build_array_type (record
, build_index_type (size_zero_node
));
2771 /* Implement va_start by filling the va_list structure VALIST.
2772 NEXTARG points to the first anonymous stack argument.
2774 The following global variables are used to initialize
2775 the va_list structure:
2777 current_function_args_info;
2778 the CUMULATIVE_ARGS for this function
2780 current_function_arg_offset_rtx:
2781 holds the offset of the first anonymous stack argument
2782 (relative to the virtual arg pointer). */
2785 spu_va_start (tree valist
, rtx nextarg
)
2787 tree f_args
, f_skip
;
2790 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2791 f_skip
= TREE_CHAIN (f_args
);
2793 valist
= build_va_arg_indirect_ref (valist
);
2795 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
2797 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
2799 /* Find the __args area. */
2800 t
= make_tree (TREE_TYPE (args
), nextarg
);
2801 if (current_function_pretend_args_size
> 0)
2802 t
= build2 (PLUS_EXPR
, TREE_TYPE (args
), t
,
2803 build_int_cst (integer_type_node
, -STACK_POINTER_OFFSET
));
2804 t
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, t
);
2805 TREE_SIDE_EFFECTS (t
) = 1;
2806 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2808 /* Find the __skip area. */
2809 t
= make_tree (TREE_TYPE (skip
), virtual_incoming_args_rtx
);
2810 t
= build2 (PLUS_EXPR
, TREE_TYPE (skip
), t
,
2811 build_int_cst (integer_type_node
,
2812 (current_function_pretend_args_size
2813 - STACK_POINTER_OFFSET
)));
2814 t
= build2 (MODIFY_EXPR
, TREE_TYPE (skip
), skip
, t
);
2815 TREE_SIDE_EFFECTS (t
) = 1;
2816 expand_expr (t
, const0_rtx
, VOIDmode
, EXPAND_NORMAL
);
2819 /* Gimplify va_arg by updating the va_list structure
2820 VALIST as required to retrieve an argument of type
2821 TYPE, and returning that argument.
2823 ret = va_arg(VALIST, TYPE);
2825 generates code equivalent to:
2827 paddedsize = (sizeof(TYPE) + 15) & -16;
2828 if (VALIST.__args + paddedsize > VALIST.__skip
2829 && VALIST.__args <= VALIST.__skip)
2830 addr = VALIST.__skip + 32;
2832 addr = VALIST.__args;
2833 VALIST.__args = addr + paddedsize;
2834 ret = *(TYPE *)addr;
2837 spu_gimplify_va_arg_expr (tree valist
, tree type
, tree
* pre_p
,
2838 tree
* post_p ATTRIBUTE_UNUSED
)
2840 tree f_args
, f_skip
;
2842 HOST_WIDE_INT size
, rsize
;
2843 tree paddedsize
, addr
, tmp
;
2844 bool pass_by_reference_p
;
2846 f_args
= TYPE_FIELDS (TREE_TYPE (va_list_type_node
));
2847 f_skip
= TREE_CHAIN (f_args
);
2849 valist
= build1 (INDIRECT_REF
, TREE_TYPE (TREE_TYPE (valist
)), valist
);
2851 build3 (COMPONENT_REF
, TREE_TYPE (f_args
), valist
, f_args
, NULL_TREE
);
2853 build3 (COMPONENT_REF
, TREE_TYPE (f_skip
), valist
, f_skip
, NULL_TREE
);
2855 addr
= create_tmp_var (ptr_type_node
, "va_arg");
2856 DECL_POINTER_ALIAS_SET (addr
) = get_varargs_alias_set ();
2858 /* if an object is dynamically sized, a pointer to it is passed
2859 instead of the object itself. */
2860 pass_by_reference_p
= spu_pass_by_reference (NULL
, TYPE_MODE (type
), type
,
2862 if (pass_by_reference_p
)
2863 type
= build_pointer_type (type
);
2864 size
= int_size_in_bytes (type
);
2865 rsize
= ((size
+ UNITS_PER_WORD
- 1) / UNITS_PER_WORD
) * UNITS_PER_WORD
;
2867 /* build conditional expression to calculate addr. The expression
2868 will be gimplified later. */
2869 paddedsize
= fold_convert (ptr_type_node
, size_int (rsize
));
2870 tmp
= build2 (PLUS_EXPR
, ptr_type_node
, args
, paddedsize
);
2871 tmp
= build2 (TRUTH_AND_EXPR
, boolean_type_node
,
2872 build2 (GT_EXPR
, boolean_type_node
, tmp
, skip
),
2873 build2 (LE_EXPR
, boolean_type_node
, args
, skip
));
2875 tmp
= build3 (COND_EXPR
, ptr_type_node
, tmp
,
2876 build2 (PLUS_EXPR
, ptr_type_node
, skip
,
2877 fold_convert (ptr_type_node
, size_int (32))), args
);
2879 tmp
= build2 (MODIFY_EXPR
, ptr_type_node
, addr
, tmp
);
2880 gimplify_and_add (tmp
, pre_p
);
2882 /* update VALIST.__args */
2883 tmp
= build2 (PLUS_EXPR
, ptr_type_node
, addr
, paddedsize
);
2884 tmp
= build2 (MODIFY_EXPR
, TREE_TYPE (args
), args
, tmp
);
2885 gimplify_and_add (tmp
, pre_p
);
2887 addr
= fold_convert (build_pointer_type (type
), addr
);
2889 if (pass_by_reference_p
)
2890 addr
= build_va_arg_indirect_ref (addr
);
2892 return build_va_arg_indirect_ref (addr
);
2895 /* Save parameter registers starting with the register that corresponds
2896 to the first unnamed parameters. If the first unnamed parameter is
2897 in the stack then save no registers. Set pretend_args_size to the
2898 amount of space needed to save the registers. */
2900 spu_setup_incoming_varargs (CUMULATIVE_ARGS
* cum
, enum machine_mode mode
,
2901 tree type
, int *pretend_size
, int no_rtl
)
2910 /* cum currently points to the last named argument, we want to
2911 start at the next argument. */
2912 FUNCTION_ARG_ADVANCE (ncum
, mode
, type
, 1);
2914 offset
= -STACK_POINTER_OFFSET
;
2915 for (regno
= ncum
; regno
< MAX_REGISTER_ARGS
; regno
++)
2917 tmp
= gen_frame_mem (V4SImode
,
2918 plus_constant (virtual_incoming_args_rtx
,
2920 emit_move_insn (tmp
,
2921 gen_rtx_REG (V4SImode
, FIRST_ARG_REGNUM
+ regno
));
2924 *pretend_size
= offset
+ STACK_POINTER_OFFSET
;
2929 spu_conditional_register_usage (void)
2933 fixed_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
2934 call_used_regs
[PIC_OFFSET_TABLE_REGNUM
] = 1;
2936 global_regs
[INTR_REGNUM
] = 1;
2939 /* This is called to decide when we can simplify a load instruction. We
2940 must only return true for registers which we know will always be
2941 aligned. Taking into account that CSE might replace this reg with
2942 another one that has not been marked aligned.
2943 So this is really only true for frame, stack and virtual registers,
2944 which we know are always aligned and should not be adversely effected
2947 regno_aligned_for_load (int regno
)
2949 return regno
== FRAME_POINTER_REGNUM
2950 || regno
== HARD_FRAME_POINTER_REGNUM
2951 || regno
== STACK_POINTER_REGNUM
2952 || (regno
>= FIRST_VIRTUAL_REGISTER
&& regno
<= LAST_VIRTUAL_REGISTER
);
2955 /* Return TRUE when mem is known to be 16-byte aligned. */
2957 aligned_mem_p (rtx mem
)
2959 if (MEM_ALIGN (mem
) >= 128)
2961 if (GET_MODE_SIZE (GET_MODE (mem
)) >= 16)
2963 if (GET_CODE (XEXP (mem
, 0)) == PLUS
)
2965 rtx p0
= XEXP (XEXP (mem
, 0), 0);
2966 rtx p1
= XEXP (XEXP (mem
, 0), 1);
2967 if (regno_aligned_for_load (REGNO (p0
)))
2969 if (GET_CODE (p1
) == REG
&& regno_aligned_for_load (REGNO (p1
)))
2971 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
2975 else if (GET_CODE (XEXP (mem
, 0)) == REG
)
2977 if (regno_aligned_for_load (REGNO (XEXP (mem
, 0))))
2980 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem
, 0)))
2982 else if (GET_CODE (XEXP (mem
, 0)) == CONST
)
2984 rtx p0
= XEXP (XEXP (XEXP (mem
, 0), 0), 0);
2985 rtx p1
= XEXP (XEXP (XEXP (mem
, 0), 0), 1);
2986 if (GET_CODE (p0
) == SYMBOL_REF
2987 && GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15) == 0)
2993 /* Return TRUE if we are certain the mem refers to a complete object
2994 which is both 16-byte aligned and padded to a 16-byte boundary. This
2995 would make it safe to store with a single instruction.
2996 We guarantee the alignment and padding for static objects by aligning
2997 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
2998 FIXME: We currently cannot guarantee this for objects on the stack
2999 because assign_parm_setup_stack calls assign_stack_local with the
3000 alignment of the parameter mode and in that case the alignment never
3001 gets adjusted by LOCAL_ALIGNMENT. */
3003 store_with_one_insn_p (rtx mem
)
3005 rtx addr
= XEXP (mem
, 0);
3006 if (GET_MODE (mem
) == BLKmode
)
3008 /* Only static objects. */
3009 if (GET_CODE (addr
) == SYMBOL_REF
)
3011 /* We use the associated declaration to make sure the access is
3012 referring to the whole object.
3013 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3014 if it is necessary. Will there be cases where one exists, and
3015 the other does not? Will there be cases where both exist, but
3016 have different types? */
3017 tree decl
= MEM_EXPR (mem
);
3019 && TREE_CODE (decl
) == VAR_DECL
3020 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3022 decl
= SYMBOL_REF_DECL (addr
);
3024 && TREE_CODE (decl
) == VAR_DECL
3025 && GET_MODE (mem
) == TYPE_MODE (TREE_TYPE (decl
)))
3032 spu_expand_mov (rtx
* ops
, enum machine_mode mode
)
3034 if (GET_CODE (ops
[0]) == SUBREG
&& !valid_subreg (ops
[0]))
3037 if (GET_CODE (ops
[1]) == SUBREG
&& !valid_subreg (ops
[1]))
3039 rtx from
= SUBREG_REG (ops
[1]);
3040 enum machine_mode imode
= GET_MODE (from
);
3042 gcc_assert (GET_MODE_CLASS (mode
) == MODE_INT
3043 && GET_MODE_CLASS (imode
) == MODE_INT
3044 && subreg_lowpart_p (ops
[1]));
3046 if (GET_MODE_SIZE (imode
) < 4)
3048 from
= gen_rtx_SUBREG (SImode
, from
, 0);
3052 if (GET_MODE_SIZE (mode
) < GET_MODE_SIZE (imode
))
3054 enum insn_code icode
= trunc_optab
->handlers
[mode
][imode
].insn_code
;
3055 emit_insn (GEN_FCN (icode
) (ops
[0], from
));
3058 emit_insn (gen_extend_insn (ops
[0], from
, mode
, imode
, 1));
3062 /* At least one of the operands needs to be a register. */
3063 if ((reload_in_progress
| reload_completed
) == 0
3064 && !register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3066 rtx temp
= force_reg (mode
, ops
[1]);
3067 emit_move_insn (ops
[0], temp
);
3070 if (reload_in_progress
|| reload_completed
)
3072 enum machine_mode mode
= GET_MODE (ops
[0]);
3073 if (GET_CODE (ops
[1]) == CONST_INT
3074 && (mode
== DImode
|| mode
== TImode
)
3075 && ((INTVAL (ops
[1]) >> 32) & 0xffffffffll
) !=
3076 (INTVAL (ops
[1]) & 0xffffffffll
))
3078 rtx mem
= force_const_mem (mode
, ops
[1]);
3079 if (TARGET_LARGE_MEM
)
3081 rtx addr
= gen_rtx_REG (Pmode
, REGNO (ops
[0]));
3082 emit_move_insn (addr
, XEXP (mem
, 0));
3083 mem
= replace_equiv_address (mem
, addr
);
3085 emit_move_insn (ops
[0], mem
);
3088 else if ((GET_CODE (ops
[1]) == CONST_INT
3089 || GET_CODE (ops
[1]) == CONST_DOUBLE
3090 || GET_CODE (ops
[1]) == CONST_VECTOR
)
3091 && !immediate_load_p (ops
[1], mode
)
3092 && !fsmbi_const_p (ops
[1]))
3094 unsigned char arrlo
[16];
3095 unsigned char arrhi
[16];
3096 rtx to
= ops
[0], hi
, lo
;
3098 constant_to_array (mode
, ops
[1], arrhi
);
3099 for (i
= 0; i
< 16; i
+= 4)
3101 arrlo
[i
+ 2] = arrhi
[i
+ 2];
3102 arrlo
[i
+ 3] = arrhi
[i
+ 3];
3103 arrlo
[i
+ 0] = arrlo
[i
+ 1] = 0;
3104 arrhi
[i
+ 2] = arrhi
[i
+ 3] = 0;
3108 to
= spu_gen_subreg (SImode
, ops
[0]);
3111 else if (mode
== V4SFmode
)
3113 to
= spu_gen_subreg (V4SImode
, ops
[0]);
3116 hi
= array_to_constant (mode
, arrhi
);
3117 lo
= array_to_constant (mode
, arrlo
);
3118 emit_move_insn (to
, hi
);
3119 emit_insn (gen_rtx_SET (VOIDmode
, to
, gen_rtx_IOR (mode
, to
, lo
)));
3126 if (GET_CODE (ops
[0]) == MEM
)
3128 if (!spu_valid_move (ops
))
3130 emit_insn (gen_store (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3131 gen_reg_rtx (TImode
)));
3135 else if (GET_CODE (ops
[1]) == MEM
)
3137 if (!spu_valid_move (ops
))
3140 (ops
[0], ops
[1], gen_reg_rtx (TImode
),
3141 gen_reg_rtx (SImode
)));
3145 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3147 if (GET_CODE (ops
[1]) == CONST_INT
)
3149 HOST_WIDE_INT val
= trunc_int_for_mode (INTVAL (ops
[1]), mode
);
3150 if (val
!= INTVAL (ops
[1]))
3152 emit_move_insn (ops
[0], GEN_INT (val
));
3163 /* For now, only frame registers are known to be aligned at all times.
3164 We can't trust REGNO_POINTER_ALIGN because optimization will move
3165 registers around, potentially changing an "aligned" register in an
3166 address to an unaligned register, which would result in an invalid
3168 int regno
= REGNO (reg
);
3169 return REGNO_PTR_FRAME_P (regno
) ? REGNO_POINTER_ALIGN (regno
) : 1;
3173 spu_split_load (rtx
* ops
)
3175 enum machine_mode mode
= GET_MODE (ops
[0]);
3176 rtx addr
, load
, rot
, mem
, p0
, p1
;
3179 addr
= XEXP (ops
[1], 0);
3183 if (GET_CODE (addr
) == PLUS
)
3186 aligned reg + aligned reg => lqx
3187 aligned reg + unaligned reg => lqx, rotqby
3188 aligned reg + aligned const => lqd
3189 aligned reg + unaligned const => lqd, rotqbyi
3190 unaligned reg + aligned reg => lqx, rotqby
3191 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3192 unaligned reg + aligned const => lqd, rotqby
3193 unaligned reg + unaligned const -> not allowed by legitimate address
3195 p0
= XEXP (addr
, 0);
3196 p1
= XEXP (addr
, 1);
3197 if (reg_align (p0
) < 128)
3199 if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3201 emit_insn (gen_addsi3 (ops
[3], p0
, p1
));
3209 if (GET_CODE (p1
) == CONST_INT
&& (INTVAL (p1
) & 15))
3211 rot_amt
= INTVAL (p1
) & 15;
3212 p1
= GEN_INT (INTVAL (p1
) & -16);
3213 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3215 else if (GET_CODE (p1
) == REG
&& reg_align (p1
) < 128)
3219 else if (GET_CODE (addr
) == REG
)
3221 if (reg_align (addr
) < 128)
3224 else if (GET_CODE (addr
) == CONST
)
3226 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3227 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3228 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3230 rot_amt
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3232 addr
= gen_rtx_CONST (Pmode
,
3233 gen_rtx_PLUS (Pmode
,
3234 XEXP (XEXP (addr
, 0), 0),
3235 GEN_INT (rot_amt
& -16)));
3237 addr
= XEXP (XEXP (addr
, 0), 0);
3242 else if (GET_CODE (addr
) == CONST_INT
)
3244 rot_amt
= INTVAL (addr
);
3245 addr
= GEN_INT (rot_amt
& -16);
3247 else if (!ALIGNED_SYMBOL_REF_P (addr
))
3250 if (GET_MODE_SIZE (mode
) < 4)
3251 rot_amt
+= GET_MODE_SIZE (mode
) - 4;
3257 emit_insn (gen_addsi3 (ops
[3], rot
, GEN_INT (rot_amt
)));
3264 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3265 mem
= change_address (ops
[1], TImode
, addr
);
3267 emit_insn (gen_movti (load
, mem
));
3270 emit_insn (gen_rotqby_ti (load
, load
, rot
));
3272 emit_insn (gen_rotlti3 (load
, load
, GEN_INT (rot_amt
* 8)));
3274 if (reload_completed
)
3275 emit_move_insn (ops
[0], gen_rtx_REG (GET_MODE (ops
[0]), REGNO (load
)));
3277 emit_insn (gen_spu_convert (ops
[0], load
));
3281 spu_split_store (rtx
* ops
)
3283 enum machine_mode mode
= GET_MODE (ops
[0]);
3286 rtx addr
, p0
, p1
, p1_lo
, smem
;
3290 addr
= XEXP (ops
[0], 0);
3292 if (GET_CODE (addr
) == PLUS
)
3295 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3296 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3297 aligned reg + aligned const => lqd, c?d, shuf, stqx
3298 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3299 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3300 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3301 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3302 unaligned reg + unaligned const -> not allowed by legitimate address
3305 p0
= XEXP (addr
, 0);
3306 p1
= p1_lo
= XEXP (addr
, 1);
3307 if (GET_CODE (p0
) == REG
&& GET_CODE (p1
) == CONST_INT
)
3309 p1_lo
= GEN_INT (INTVAL (p1
) & 15);
3310 p1
= GEN_INT (INTVAL (p1
) & -16);
3311 addr
= gen_rtx_PLUS (SImode
, p0
, p1
);
3314 else if (GET_CODE (addr
) == REG
)
3318 p1
= p1_lo
= const0_rtx
;
3323 p0
= gen_rtx_REG (SImode
, STACK_POINTER_REGNUM
);
3324 p1
= 0; /* aform doesn't use p1 */
3326 if (ALIGNED_SYMBOL_REF_P (addr
))
3328 else if (GET_CODE (addr
) == CONST
)
3330 if (GET_CODE (XEXP (addr
, 0)) == PLUS
3331 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr
, 0), 0))
3332 && GET_CODE (XEXP (XEXP (addr
, 0), 1)) == CONST_INT
)
3334 HOST_WIDE_INT v
= INTVAL (XEXP (XEXP (addr
, 0), 1));
3336 addr
= gen_rtx_CONST (Pmode
,
3337 gen_rtx_PLUS (Pmode
,
3338 XEXP (XEXP (addr
, 0), 0),
3339 GEN_INT (v
& -16)));
3341 addr
= XEXP (XEXP (addr
, 0), 0);
3342 p1_lo
= GEN_INT (v
& 15);
3345 else if (GET_CODE (addr
) == CONST_INT
)
3347 p1_lo
= GEN_INT (INTVAL (addr
) & 15);
3348 addr
= GEN_INT (INTVAL (addr
) & -16);
3352 addr
= gen_rtx_AND (SImode
, copy_rtx (addr
), GEN_INT (-16));
3354 scalar
= store_with_one_insn_p (ops
[0]);
3357 /* We could copy the flags from the ops[0] MEM to mem here,
3358 We don't because we want this load to be optimized away if
3359 possible, and copying the flags will prevent that in certain
3360 cases, e.g. consider the volatile flag. */
3362 rtx lmem
= change_address (ops
[0], TImode
, copy_rtx (addr
));
3363 set_mem_alias_set (lmem
, 0);
3364 emit_insn (gen_movti (reg
, lmem
));
3366 if (!p0
|| reg_align (p0
) >= 128)
3367 p0
= stack_pointer_rtx
;
3371 emit_insn (gen_cpat (pat
, p0
, p1_lo
, GEN_INT (GET_MODE_SIZE (mode
))));
3372 emit_insn (gen_shufb (reg
, ops
[1], reg
, pat
));
3374 else if (reload_completed
)
3376 if (GET_CODE (ops
[1]) == REG
)
3377 emit_move_insn (reg
, gen_rtx_REG (GET_MODE (reg
), REGNO (ops
[1])));
3378 else if (GET_CODE (ops
[1]) == SUBREG
)
3379 emit_move_insn (reg
,
3380 gen_rtx_REG (GET_MODE (reg
),
3381 REGNO (SUBREG_REG (ops
[1]))));
3387 if (GET_CODE (ops
[1]) == REG
)
3388 emit_insn (gen_spu_convert (reg
, ops
[1]));
3389 else if (GET_CODE (ops
[1]) == SUBREG
)
3390 emit_insn (gen_spu_convert (reg
, SUBREG_REG (ops
[1])));
3395 if (GET_MODE_SIZE (mode
) < 4 && scalar
)
3396 emit_insn (gen_shlqby_ti
3397 (reg
, reg
, GEN_INT (4 - GET_MODE_SIZE (mode
))));
3399 smem
= change_address (ops
[0], TImode
, addr
);
3400 /* We can't use the previous alias set because the memory has changed
3401 size and can potentially overlap objects of other types. */
3402 set_mem_alias_set (smem
, 0);
3404 emit_insn (gen_movti (smem
, reg
));
3407 /* Return TRUE if X is MEM which is a struct member reference
3408 and the member can safely be loaded and stored with a single
3409 instruction because it is padded. */
3411 mem_is_padded_component_ref (rtx x
)
3413 tree t
= MEM_EXPR (x
);
3415 if (!t
|| TREE_CODE (t
) != COMPONENT_REF
)
3417 t
= TREE_OPERAND (t
, 1);
3418 if (!t
|| TREE_CODE (t
) != FIELD_DECL
3419 || DECL_ALIGN (t
) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t
)))
3421 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3422 r
= DECL_FIELD_CONTEXT (t
);
3423 if (!r
|| TREE_CODE (r
) != RECORD_TYPE
)
3425 /* Make sure they are the same mode */
3426 if (GET_MODE (x
) != TYPE_MODE (TREE_TYPE (t
)))
3428 /* If there are no following fields then the field alignment assures
3429 the structure is padded to the alignment which means this field is
3431 if (TREE_CHAIN (t
) == 0)
3433 /* If the following field is also aligned then this field will be
3436 if (TREE_CODE (t
) == FIELD_DECL
&& DECL_ALIGN (t
) >= 128)
3442 spu_valid_move (rtx
* ops
)
3444 enum machine_mode mode
= GET_MODE (ops
[0]);
3445 if (!register_operand (ops
[0], mode
) && !register_operand (ops
[1], mode
))
3448 /* init_expr_once tries to recog against load and store insns to set
3449 the direct_load[] and direct_store[] arrays. We always want to
3450 consider those loads and stores valid. init_expr_once is called in
3451 the context of a dummy function which does not have a decl. */
3452 if (cfun
->decl
== 0)
3455 /* Don't allows loads/stores which would require more than 1 insn.
3456 During and after reload we assume loads and stores only take 1
3458 if (GET_MODE_SIZE (mode
) < 16 && !reload_in_progress
&& !reload_completed
)
3460 if (GET_CODE (ops
[0]) == MEM
3461 && (GET_MODE_SIZE (mode
) < 4
3462 || !(store_with_one_insn_p (ops
[0])
3463 || mem_is_padded_component_ref (ops
[0]))))
3465 if (GET_CODE (ops
[1]) == MEM
3466 && (GET_MODE_SIZE (mode
) < 4 || !aligned_mem_p (ops
[1])))
3472 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3473 can be generated using the fsmbi instruction. */
3475 fsmbi_const_p (rtx x
)
3477 enum machine_mode mode
;
3478 unsigned char arr
[16];
3481 /* We can always choose DImode for CONST_INT because the high bits of
3482 an SImode will always be all 1s, i.e., valid for fsmbi. */
3483 mode
= GET_CODE (x
) == CONST_INT
? DImode
: GET_MODE (x
);
3484 constant_to_array (mode
, x
, arr
);
3486 for (i
= 0; i
< 16; i
++)
3487 if (arr
[i
] != 0 && arr
[i
] != 0xff)
3492 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3493 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3494 than 16 bytes, the value is repeated across the rest of the array. */
3496 constant_to_array (enum machine_mode mode
, rtx x
, unsigned char arr
[16])
3501 memset (arr
, 0, 16);
3502 mode
= GET_MODE (x
) != VOIDmode
? GET_MODE (x
) : mode
;
3503 if (GET_CODE (x
) == CONST_INT
3504 || (GET_CODE (x
) == CONST_DOUBLE
3505 && (mode
== SFmode
|| mode
== DFmode
)))
3507 gcc_assert (mode
!= VOIDmode
&& mode
!= BLKmode
);
3509 if (GET_CODE (x
) == CONST_DOUBLE
)
3510 val
= const_double_to_hwint (x
);
3513 first
= GET_MODE_SIZE (mode
) - 1;
3514 for (i
= first
; i
>= 0; i
--)
3516 arr
[i
] = val
& 0xff;
3519 /* Splat the constant across the whole array. */
3520 for (j
= 0, i
= first
+ 1; i
< 16; i
++)
3523 j
= (j
== first
) ? 0 : j
+ 1;
3526 else if (GET_CODE (x
) == CONST_DOUBLE
)
3528 val
= CONST_DOUBLE_LOW (x
);
3529 for (i
= 15; i
>= 8; i
--)
3531 arr
[i
] = val
& 0xff;
3534 val
= CONST_DOUBLE_HIGH (x
);
3535 for (i
= 7; i
>= 0; i
--)
3537 arr
[i
] = val
& 0xff;
3541 else if (GET_CODE (x
) == CONST_VECTOR
)
3545 mode
= GET_MODE_INNER (mode
);
3546 units
= CONST_VECTOR_NUNITS (x
);
3547 for (i
= 0; i
< units
; i
++)
3549 elt
= CONST_VECTOR_ELT (x
, i
);
3550 if (GET_CODE (elt
) == CONST_INT
|| GET_CODE (elt
) == CONST_DOUBLE
)
3552 if (GET_CODE (elt
) == CONST_DOUBLE
)
3553 val
= const_double_to_hwint (elt
);
3556 first
= GET_MODE_SIZE (mode
) - 1;
3557 if (first
+ i
* GET_MODE_SIZE (mode
) > 16)
3559 for (j
= first
; j
>= 0; j
--)
3561 arr
[j
+ i
* GET_MODE_SIZE (mode
)] = val
& 0xff;
3571 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
3572 smaller than 16 bytes, use the bytes that would represent that value
3573 in a register, e.g., for QImode return the value of arr[3]. */
3575 array_to_constant (enum machine_mode mode
, unsigned char arr
[16])
3577 enum machine_mode inner_mode
;
3579 int units
, size
, i
, j
, k
;
3582 if (GET_MODE_CLASS (mode
) == MODE_INT
3583 && GET_MODE_BITSIZE (mode
) <= HOST_BITS_PER_WIDE_INT
)
3585 j
= GET_MODE_SIZE (mode
);
3586 i
= j
< 4 ? 4 - j
: 0;
3587 for (val
= 0; i
< j
; i
++)
3588 val
= (val
<< 8) | arr
[i
];
3589 val
= trunc_int_for_mode (val
, mode
);
3590 return GEN_INT (val
);
3596 for (i
= high
= 0; i
< 8; i
++)
3597 high
= (high
<< 8) | arr
[i
];
3598 for (i
= 8, val
= 0; i
< 16; i
++)
3599 val
= (val
<< 8) | arr
[i
];
3600 return immed_double_const (val
, high
, TImode
);
3604 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3605 val
= trunc_int_for_mode (val
, SImode
);
3606 return hwint_to_const_double (val
, SFmode
);
3610 val
= (arr
[0] << 24) | (arr
[1] << 16) | (arr
[2] << 8) | arr
[3];
3612 val
|= (arr
[4] << 24) | (arr
[5] << 16) | (arr
[6] << 8) | arr
[7];
3613 return hwint_to_const_double (val
, DFmode
);
3616 if (!VECTOR_MODE_P (mode
))
3619 units
= GET_MODE_NUNITS (mode
);
3620 size
= GET_MODE_UNIT_SIZE (mode
);
3621 inner_mode
= GET_MODE_INNER (mode
);
3622 v
= rtvec_alloc (units
);
3624 for (k
= i
= 0; i
< units
; ++i
)
3627 for (j
= 0; j
< size
; j
++, k
++)
3628 val
= (val
<< 8) | arr
[k
];
3630 if (GET_MODE_CLASS (inner_mode
) == MODE_FLOAT
)
3631 RTVEC_ELT (v
, i
) = hwint_to_const_double (inner_mode
, val
);
3633 RTVEC_ELT (v
, i
) = GEN_INT (trunc_int_for_mode (val
, inner_mode
));
3638 return gen_rtx_CONST_VECTOR (mode
, v
);
3642 reloc_diagnostic (rtx x
)
3644 tree loc_decl
, decl
= 0;
3646 if (!flag_pic
|| !(TARGET_WARN_RELOC
|| TARGET_ERROR_RELOC
))
3649 if (GET_CODE (x
) == SYMBOL_REF
)
3650 decl
= SYMBOL_REF_DECL (x
);
3651 else if (GET_CODE (x
) == CONST
3652 && GET_CODE (XEXP (XEXP (x
, 0), 0)) == SYMBOL_REF
)
3653 decl
= SYMBOL_REF_DECL (XEXP (XEXP (x
, 0), 0));
3655 /* SYMBOL_REF_DECL is not necessarily a DECL. */
3656 if (decl
&& !DECL_P (decl
))
3659 /* We use last_assemble_variable_decl to get line information. It's
3660 not always going to be right and might not even be close, but will
3661 be right for the more common cases. */
3662 if (!last_assemble_variable_decl
)
3665 loc_decl
= last_assemble_variable_decl
;
3667 /* The decl could be a string constant. */
3668 if (decl
&& DECL_P (decl
))
3669 msg
= "%Jcreating run-time relocation for %qD";
3671 msg
= "creating run-time relocation";
3673 if (TARGET_WARN_RELOC
)
3674 warning (0, msg
, loc_decl
, decl
);
3676 error (msg
, loc_decl
, decl
);
3679 /* Hook into assemble_integer so we can generate an error for run-time
3680 relocations. The SPU ABI disallows them. */
3682 spu_assemble_integer (rtx x
, unsigned int size
, int aligned_p
)
3684 /* By default run-time relocations aren't supported, but we allow them
3685 in case users support it in their own run-time loader. And we provide
3686 a warning for those users that don't. */
3687 if ((GET_CODE (x
) == SYMBOL_REF
)
3688 || GET_CODE (x
) == LABEL_REF
|| GET_CODE (x
) == CONST
)
3689 reloc_diagnostic (x
);
3691 return default_assemble_integer (x
, size
, aligned_p
);
3695 spu_asm_globalize_label (FILE * file
, const char *name
)
3697 fputs ("\t.global\t", file
);
3698 assemble_name (file
, name
);
3703 spu_rtx_costs (rtx x
, int code
, int outer_code ATTRIBUTE_UNUSED
, int *total
)
3705 enum machine_mode mode
= GET_MODE (x
);
3706 int cost
= COSTS_N_INSNS (2);
3708 /* Folding to a CONST_VECTOR will use extra space but there might
3709 be only a small savings in cycles. We'd like to use a CONST_VECTOR
3710 only if it allows us to fold away multiple insns. Changing the cost
3711 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
3712 because this cost will only be compared against a single insn.
3713 if (code == CONST_VECTOR)
3714 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
3717 /* Use defaults for float operations. Not accurate but good enough. */
3720 *total
= COSTS_N_INSNS (13);
3725 *total
= COSTS_N_INSNS (6);
3731 if (satisfies_constraint_K (x
))
3733 else if (INTVAL (x
) >= -0x80000000ll
&& INTVAL (x
) <= 0xffffffffll
)
3734 *total
= COSTS_N_INSNS (1);
3736 *total
= COSTS_N_INSNS (3);
3740 *total
= COSTS_N_INSNS (3);
3745 *total
= COSTS_N_INSNS (0);
3749 *total
= COSTS_N_INSNS (5);
3753 case FLOAT_TRUNCATE
:
3755 case UNSIGNED_FLOAT
:
3758 *total
= COSTS_N_INSNS (7);
3764 *total
= COSTS_N_INSNS (9);
3771 GET_CODE (XEXP (x
, 0)) ==
3772 REG
? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
3773 if (mode
== SImode
&& GET_CODE (XEXP (x
, 0)) == REG
)
3775 if (GET_CODE (XEXP (x
, 1)) == CONST_INT
)
3777 HOST_WIDE_INT val
= INTVAL (XEXP (x
, 1));
3778 cost
= COSTS_N_INSNS (14);
3779 if ((val
& 0xffff) == 0)
3780 cost
= COSTS_N_INSNS (9);
3781 else if (val
> 0 && val
< 0x10000)
3782 cost
= COSTS_N_INSNS (11);
3791 *total
= COSTS_N_INSNS (20);
3798 *total
= COSTS_N_INSNS (4);
3801 if (XINT (x
, 1) == UNSPEC_CONVERT
)
3802 *total
= COSTS_N_INSNS (0);
3804 *total
= COSTS_N_INSNS (4);
3807 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
3808 if (GET_MODE_CLASS (mode
) == MODE_INT
3809 && GET_MODE_SIZE (mode
) > GET_MODE_SIZE (SImode
) && cfun
&& cfun
->decl
)
3810 cost
= cost
* (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
))
3811 * (GET_MODE_SIZE (mode
) / GET_MODE_SIZE (SImode
));
3817 spu_eh_return_filter_mode (void)
3819 /* We would like this to be SImode, but sjlj exceptions seems to work
3820 only with word_mode. */
3824 /* Decide whether we can make a sibling call to a function. DECL is the
3825 declaration of the function being targeted by the call and EXP is the
3826 CALL_EXPR representing the call. */
3828 spu_function_ok_for_sibcall (tree decl
, tree exp ATTRIBUTE_UNUSED
)
3830 return decl
&& !TARGET_LARGE_MEM
;
3833 /* We need to correctly update the back chain pointer and the Available
3834 Stack Size (which is in the second slot of the sp register.) */
3836 spu_allocate_stack (rtx op0
, rtx op1
)
3839 rtx chain
= gen_reg_rtx (V4SImode
);
3840 rtx stack_bot
= gen_frame_mem (V4SImode
, stack_pointer_rtx
);
3841 rtx sp
= gen_reg_rtx (V4SImode
);
3842 rtx splatted
= gen_reg_rtx (V4SImode
);
3843 rtx pat
= gen_reg_rtx (TImode
);
3845 /* copy the back chain so we can save it back again. */
3846 emit_move_insn (chain
, stack_bot
);
3848 op1
= force_reg (SImode
, op1
);
3850 v
= 0x1020300010203ll
;
3851 emit_move_insn (pat
, immed_double_const (v
, v
, TImode
));
3852 emit_insn (gen_shufb (splatted
, op1
, op1
, pat
));
3854 emit_insn (gen_spu_convert (sp
, stack_pointer_rtx
));
3855 emit_insn (gen_subv4si3 (sp
, sp
, splatted
));
3857 if (flag_stack_check
)
3859 rtx avail
= gen_reg_rtx(SImode
);
3860 rtx result
= gen_reg_rtx(SImode
);
3861 emit_insn (gen_vec_extractv4si (avail
, sp
, GEN_INT (1)));
3862 emit_insn (gen_cgt_si(result
, avail
, GEN_INT (-1)));
3863 emit_insn (gen_spu_heq (result
, GEN_INT(0) ));
3866 emit_insn (gen_spu_convert (stack_pointer_rtx
, sp
));
3868 emit_move_insn (stack_bot
, chain
);
3870 emit_move_insn (op0
, virtual_stack_dynamic_rtx
);
3874 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED
, rtx op1
)
3876 static unsigned char arr
[16] =
3877 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
3878 rtx temp
= gen_reg_rtx (SImode
);
3879 rtx temp2
= gen_reg_rtx (SImode
);
3880 rtx temp3
= gen_reg_rtx (V4SImode
);
3881 rtx temp4
= gen_reg_rtx (V4SImode
);
3882 rtx pat
= gen_reg_rtx (TImode
);
3883 rtx sp
= gen_rtx_REG (V4SImode
, STACK_POINTER_REGNUM
);
3885 /* Restore the backchain from the first word, sp from the second. */
3886 emit_move_insn (temp2
, adjust_address_nv (op1
, SImode
, 0));
3887 emit_move_insn (temp
, adjust_address_nv (op1
, SImode
, 4));
3889 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
3891 /* Compute Available Stack Size for sp */
3892 emit_insn (gen_subsi3 (temp
, temp
, stack_pointer_rtx
));
3893 emit_insn (gen_shufb (temp3
, temp
, temp
, pat
));
3895 /* Compute Available Stack Size for back chain */
3896 emit_insn (gen_subsi3 (temp2
, temp2
, stack_pointer_rtx
));
3897 emit_insn (gen_shufb (temp4
, temp2
, temp2
, pat
));
3898 emit_insn (gen_addv4si3 (temp4
, sp
, temp4
));
3900 emit_insn (gen_addv4si3 (sp
, sp
, temp3
));
3901 emit_move_insn (gen_frame_mem (V4SImode
, stack_pointer_rtx
), temp4
);
3905 spu_init_libfuncs (void)
3907 set_optab_libfunc (smul_optab
, DImode
, "__muldi3");
3908 set_optab_libfunc (sdiv_optab
, DImode
, "__divdi3");
3909 set_optab_libfunc (smod_optab
, DImode
, "__moddi3");
3910 set_optab_libfunc (udiv_optab
, DImode
, "__udivdi3");
3911 set_optab_libfunc (umod_optab
, DImode
, "__umoddi3");
3912 set_optab_libfunc (udivmod_optab
, DImode
, "__udivmoddi4");
3913 set_optab_libfunc (ffs_optab
, DImode
, "__ffsdi2");
3914 set_optab_libfunc (clz_optab
, DImode
, "__clzdi2");
3915 set_optab_libfunc (ctz_optab
, DImode
, "__ctzdi2");
3916 set_optab_libfunc (popcount_optab
, DImode
, "__popcountdi2");
3917 set_optab_libfunc (parity_optab
, DImode
, "__paritydi2");
3919 set_conv_libfunc (ufloat_optab
, DFmode
, SImode
, "__float_unssidf");
3920 set_conv_libfunc (ufloat_optab
, DFmode
, DImode
, "__float_unsdidf");
3923 /* Make a subreg, stripping any existing subreg. We could possibly just
3924 call simplify_subreg, but in this case we know what we want. */
3926 spu_gen_subreg (enum machine_mode mode
, rtx x
)
3928 if (GET_CODE (x
) == SUBREG
)
3930 if (GET_MODE (x
) == mode
)
3932 return gen_rtx_SUBREG (mode
, x
, 0);
3936 spu_return_in_memory (tree type
, tree fntype ATTRIBUTE_UNUSED
)
3938 return (TYPE_MODE (type
) == BLKmode
3940 || TREE_CODE (TYPE_SIZE (type
)) != INTEGER_CST
3941 || int_size_in_bytes (type
) >
3942 (MAX_REGISTER_RETURN
* UNITS_PER_WORD
)));
3945 /* Create the built-in types and functions */
3947 struct spu_builtin_description spu_builtins
[] = {
3948 #define DEF_BUILTIN(fcode, icode, name, type, params) \
3949 {fcode, icode, name, type, params, NULL_TREE},
3950 #include "spu-builtins.def"
3955 spu_init_builtins (void)
3957 struct spu_builtin_description
*d
;
3960 V16QI_type_node
= build_vector_type (intQI_type_node
, 16);
3961 V8HI_type_node
= build_vector_type (intHI_type_node
, 8);
3962 V4SI_type_node
= build_vector_type (intSI_type_node
, 4);
3963 V2DI_type_node
= build_vector_type (intDI_type_node
, 2);
3964 V4SF_type_node
= build_vector_type (float_type_node
, 4);
3965 V2DF_type_node
= build_vector_type (double_type_node
, 2);
3967 unsigned_V16QI_type_node
= build_vector_type (unsigned_intQI_type_node
, 16);
3968 unsigned_V8HI_type_node
= build_vector_type (unsigned_intHI_type_node
, 8);
3969 unsigned_V4SI_type_node
= build_vector_type (unsigned_intSI_type_node
, 4);
3970 unsigned_V2DI_type_node
= build_vector_type (unsigned_intDI_type_node
, 2);
3972 spu_builtin_types
[SPU_BTI_QUADWORD
] = V16QI_type_node
;
3974 spu_builtin_types
[SPU_BTI_7
] = global_trees
[TI_INTSI_TYPE
];
3975 spu_builtin_types
[SPU_BTI_S7
] = global_trees
[TI_INTSI_TYPE
];
3976 spu_builtin_types
[SPU_BTI_U7
] = global_trees
[TI_INTSI_TYPE
];
3977 spu_builtin_types
[SPU_BTI_S10
] = global_trees
[TI_INTSI_TYPE
];
3978 spu_builtin_types
[SPU_BTI_S10_4
] = global_trees
[TI_INTSI_TYPE
];
3979 spu_builtin_types
[SPU_BTI_U14
] = global_trees
[TI_INTSI_TYPE
];
3980 spu_builtin_types
[SPU_BTI_16
] = global_trees
[TI_INTSI_TYPE
];
3981 spu_builtin_types
[SPU_BTI_S16
] = global_trees
[TI_INTSI_TYPE
];
3982 spu_builtin_types
[SPU_BTI_S16_2
] = global_trees
[TI_INTSI_TYPE
];
3983 spu_builtin_types
[SPU_BTI_U16
] = global_trees
[TI_INTSI_TYPE
];
3984 spu_builtin_types
[SPU_BTI_U16_2
] = global_trees
[TI_INTSI_TYPE
];
3985 spu_builtin_types
[SPU_BTI_U18
] = global_trees
[TI_INTSI_TYPE
];
3987 spu_builtin_types
[SPU_BTI_INTQI
] = global_trees
[TI_INTQI_TYPE
];
3988 spu_builtin_types
[SPU_BTI_INTHI
] = global_trees
[TI_INTHI_TYPE
];
3989 spu_builtin_types
[SPU_BTI_INTSI
] = global_trees
[TI_INTSI_TYPE
];
3990 spu_builtin_types
[SPU_BTI_INTDI
] = global_trees
[TI_INTDI_TYPE
];
3991 spu_builtin_types
[SPU_BTI_UINTQI
] = global_trees
[TI_UINTQI_TYPE
];
3992 spu_builtin_types
[SPU_BTI_UINTHI
] = global_trees
[TI_UINTHI_TYPE
];
3993 spu_builtin_types
[SPU_BTI_UINTSI
] = global_trees
[TI_UINTSI_TYPE
];
3994 spu_builtin_types
[SPU_BTI_UINTDI
] = global_trees
[TI_UINTDI_TYPE
];
3996 spu_builtin_types
[SPU_BTI_FLOAT
] = global_trees
[TI_FLOAT_TYPE
];
3997 spu_builtin_types
[SPU_BTI_DOUBLE
] = global_trees
[TI_DOUBLE_TYPE
];
3999 spu_builtin_types
[SPU_BTI_VOID
] = global_trees
[TI_VOID_TYPE
];
4001 spu_builtin_types
[SPU_BTI_PTR
] =
4002 build_pointer_type (build_qualified_type
4004 TYPE_QUAL_CONST
| TYPE_QUAL_VOLATILE
));
4006 /* For each builtin we build a new prototype. The tree code will make
4007 sure nodes are shared. */
4008 for (i
= 0, d
= spu_builtins
; i
< NUM_SPU_BUILTINS
; i
++, d
++)
4011 char name
[64]; /* build_function will make a copy. */
4017 /* find last parm */
4018 for (parm
= 1; d
->parm
[parm
] != SPU_BTI_END_OF_PARAMS
; parm
++)
4024 p
= tree_cons (NULL_TREE
, spu_builtin_types
[d
->parm
[--parm
]], p
);
4026 p
= build_function_type (spu_builtin_types
[d
->parm
[0]], p
);
4028 sprintf (name
, "__builtin_%s", d
->name
);
4030 add_builtin_function (name
, p
, END_BUILTINS
+ i
, BUILT_IN_MD
,
4036 spu_safe_dma (HOST_WIDE_INT channel
)
4038 return (channel
>= 21 && channel
<= 27);
4042 spu_builtin_splats (rtx ops
[])
4044 enum machine_mode mode
= GET_MODE (ops
[0]);
4045 if (GET_CODE (ops
[1]) == CONST_INT
|| GET_CODE (ops
[1]) == CONST_DOUBLE
)
4047 unsigned char arr
[16];
4048 constant_to_array (GET_MODE_INNER (mode
), ops
[1], arr
);
4049 emit_move_insn (ops
[0], array_to_constant (mode
, arr
));
4051 else if (GET_MODE (ops
[0]) == V4SImode
&& CONSTANT_P (ops
[1]))
4053 rtvec v
= rtvec_alloc (4);
4054 RTVEC_ELT (v
, 0) = ops
[1];
4055 RTVEC_ELT (v
, 1) = ops
[1];
4056 RTVEC_ELT (v
, 2) = ops
[1];
4057 RTVEC_ELT (v
, 3) = ops
[1];
4058 emit_move_insn (ops
[0], gen_rtx_CONST_VECTOR (mode
, v
));
4062 rtx reg
= gen_reg_rtx (TImode
);
4064 if (GET_CODE (ops
[1]) != REG
4065 && GET_CODE (ops
[1]) != SUBREG
)
4066 ops
[1] = force_reg (GET_MODE_INNER (mode
), ops
[1]);
4072 immed_double_const (0x0001020304050607ll
, 0x1011121314151617ll
,
4078 immed_double_const (0x0001020300010203ll
, 0x0001020300010203ll
,
4083 immed_double_const (0x0203020302030203ll
, 0x0203020302030203ll
,
4088 immed_double_const (0x0303030303030303ll
, 0x0303030303030303ll
,
4094 emit_move_insn (reg
, shuf
);
4095 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[1], reg
));
4100 spu_builtin_extract (rtx ops
[])
4102 enum machine_mode mode
;
4105 mode
= GET_MODE (ops
[1]);
4107 if (GET_CODE (ops
[2]) == CONST_INT
)
4112 emit_insn (gen_vec_extractv16qi (ops
[0], ops
[1], ops
[2]));
4115 emit_insn (gen_vec_extractv8hi (ops
[0], ops
[1], ops
[2]));
4118 emit_insn (gen_vec_extractv4sf (ops
[0], ops
[1], ops
[2]));
4121 emit_insn (gen_vec_extractv4si (ops
[0], ops
[1], ops
[2]));
4124 emit_insn (gen_vec_extractv2di (ops
[0], ops
[1], ops
[2]));
4127 emit_insn (gen_vec_extractv2df (ops
[0], ops
[1], ops
[2]));
4135 from
= spu_gen_subreg (TImode
, ops
[1]);
4136 rot
= gen_reg_rtx (TImode
);
4137 tmp
= gen_reg_rtx (SImode
);
4142 emit_insn (gen_addsi3 (tmp
, ops
[2], GEN_INT (-3)));
4145 emit_insn (gen_addsi3 (tmp
, ops
[2], ops
[2]));
4146 emit_insn (gen_addsi3 (tmp
, tmp
, GEN_INT (-2)));
4150 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (2)));
4154 emit_insn (gen_ashlsi3 (tmp
, ops
[2], GEN_INT (3)));
4159 emit_insn (gen_rotqby_ti (rot
, from
, tmp
));
4161 emit_insn (gen_spu_convert (ops
[0], rot
));
4165 spu_builtin_insert (rtx ops
[])
4167 enum machine_mode mode
= GET_MODE (ops
[0]);
4168 enum machine_mode imode
= GET_MODE_INNER (mode
);
4169 rtx mask
= gen_reg_rtx (TImode
);
4172 if (GET_CODE (ops
[3]) == CONST_INT
)
4173 offset
= GEN_INT (INTVAL (ops
[3]) * GET_MODE_SIZE (imode
));
4176 offset
= gen_reg_rtx (SImode
);
4177 emit_insn (gen_mulsi3
4178 (offset
, ops
[3], GEN_INT (GET_MODE_SIZE (imode
))));
4181 (mask
, stack_pointer_rtx
, offset
,
4182 GEN_INT (GET_MODE_SIZE (imode
))));
4183 emit_insn (gen_shufb (ops
[0], ops
[1], ops
[2], mask
));
4187 spu_builtin_promote (rtx ops
[])
4189 enum machine_mode mode
, imode
;
4190 rtx rot
, from
, offset
;
4193 mode
= GET_MODE (ops
[0]);
4194 imode
= GET_MODE_INNER (mode
);
4196 from
= gen_reg_rtx (TImode
);
4197 rot
= spu_gen_subreg (TImode
, ops
[0]);
4199 emit_insn (gen_spu_convert (from
, ops
[1]));
4201 if (GET_CODE (ops
[2]) == CONST_INT
)
4203 pos
= -GET_MODE_SIZE (imode
) * INTVAL (ops
[2]);
4204 if (GET_MODE_SIZE (imode
) < 4)
4205 pos
+= 4 - GET_MODE_SIZE (imode
);
4206 offset
= GEN_INT (pos
& 15);
4210 offset
= gen_reg_rtx (SImode
);
4214 emit_insn (gen_subsi3 (offset
, GEN_INT (3), ops
[2]));
4217 emit_insn (gen_subsi3 (offset
, GEN_INT (1), ops
[2]));
4218 emit_insn (gen_addsi3 (offset
, offset
, offset
));
4222 emit_insn (gen_subsi3 (offset
, GEN_INT (0), ops
[2]));
4223 emit_insn (gen_ashlsi3 (offset
, offset
, GEN_INT (2)));
4227 emit_insn (gen_ashlsi3 (offset
, ops
[2], GEN_INT (3)));
4233 emit_insn (gen_rotqby_ti (rot
, from
, offset
));
4237 spu_initialize_trampoline (rtx tramp
, rtx fnaddr
, rtx cxt
)
4239 rtx shuf
= gen_reg_rtx (V4SImode
);
4240 rtx insn
= gen_reg_rtx (V4SImode
);
4245 fnaddr
= force_reg (SImode
, fnaddr
);
4246 cxt
= force_reg (SImode
, cxt
);
4248 if (TARGET_LARGE_MEM
)
4250 rtx rotl
= gen_reg_rtx (V4SImode
);
4251 rtx mask
= gen_reg_rtx (V4SImode
);
4252 rtx bi
= gen_reg_rtx (SImode
);
4253 unsigned char shufa
[16] = {
4254 2, 3, 0, 1, 18, 19, 16, 17,
4255 0, 1, 2, 3, 16, 17, 18, 19
4257 unsigned char insna
[16] = {
4259 0x41, 0, 0, STATIC_CHAIN_REGNUM
,
4261 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4264 shufc
= force_reg (TImode
, array_to_constant (TImode
, shufa
));
4265 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4267 emit_insn (gen_shufb (shuf
, fnaddr
, cxt
, shufc
));
4268 emit_insn (gen_rotlv4si3 (rotl
, shuf
, spu_const (V4SImode
, 7)));
4269 emit_insn (gen_movv4si (mask
, spu_const (V4SImode
, 0xffff << 7)));
4270 emit_insn (gen_selb (insn
, insnc
, rotl
, mask
));
4272 mem
= memory_address (Pmode
, tramp
);
4273 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4275 emit_move_insn (bi
, GEN_INT (0x35000000 + (79 << 7)));
4276 mem
= memory_address (Pmode
, plus_constant (tramp
, 16));
4277 emit_move_insn (gen_rtx_MEM (Pmode
, mem
), bi
);
4281 rtx scxt
= gen_reg_rtx (SImode
);
4282 rtx sfnaddr
= gen_reg_rtx (SImode
);
4283 unsigned char insna
[16] = {
4284 0x42, 0, 0, STATIC_CHAIN_REGNUM
,
4290 shufc
= gen_reg_rtx (TImode
);
4291 insnc
= force_reg (V4SImode
, array_to_constant (V4SImode
, insna
));
4293 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4294 fits 18 bits and the last 4 are zeros. This will be true if
4295 the stack pointer is initialized to 0x3fff0 at program start,
4296 otherwise the ila instruction will be garbage. */
4298 emit_insn (gen_ashlsi3 (scxt
, cxt
, GEN_INT (7)));
4299 emit_insn (gen_ashlsi3 (sfnaddr
, fnaddr
, GEN_INT (5)));
4301 (shufc
, stack_pointer_rtx
, GEN_INT (4), GEN_INT (4)));
4302 emit_insn (gen_shufb (shuf
, sfnaddr
, scxt
, shufc
));
4303 emit_insn (gen_iorv4si3 (insn
, insnc
, shuf
));
4305 mem
= memory_address (Pmode
, tramp
);
4306 emit_move_insn (gen_rtx_MEM (V4SImode
, mem
), insn
);
4309 emit_insn (gen_sync ());
4313 spu_expand_sign_extend (rtx ops
[])
4315 unsigned char arr
[16];
4316 rtx pat
= gen_reg_rtx (TImode
);
4319 last
= GET_MODE (ops
[0]) == DImode
? 7 : 15;
4320 if (GET_MODE (ops
[1]) == QImode
)
4322 sign
= gen_reg_rtx (HImode
);
4323 emit_insn (gen_extendqihi2 (sign
, ops
[1]));
4324 for (i
= 0; i
< 16; i
++)
4330 for (i
= 0; i
< 16; i
++)
4332 switch (GET_MODE (ops
[1]))
4335 sign
= gen_reg_rtx (SImode
);
4336 emit_insn (gen_extendhisi2 (sign
, ops
[1]));
4338 arr
[last
- 1] = 0x02;
4341 sign
= gen_reg_rtx (SImode
);
4342 emit_insn (gen_ashrsi3 (sign
, ops
[1], GEN_INT (31)));
4343 for (i
= 0; i
< 4; i
++)
4344 arr
[last
- i
] = 3 - i
;
4347 sign
= gen_reg_rtx (SImode
);
4348 c
= gen_reg_rtx (SImode
);
4349 emit_insn (gen_spu_convert (c
, ops
[1]));
4350 emit_insn (gen_ashrsi3 (sign
, c
, GEN_INT (31)));
4351 for (i
= 0; i
< 8; i
++)
4352 arr
[last
- i
] = 7 - i
;
4358 emit_move_insn (pat
, array_to_constant (TImode
, arr
));
4359 emit_insn (gen_shufb (ops
[0], ops
[1], sign
, pat
));
4362 /* expand vector initialization. If there are any constant parts,
4363 load constant parts first. Then load any non-constant parts. */
4365 spu_expand_vector_init (rtx target
, rtx vals
)
4367 enum machine_mode mode
= GET_MODE (target
);
4368 int n_elts
= GET_MODE_NUNITS (mode
);
4370 bool all_same
= true;
4371 rtx first
, x
= NULL_RTX
, first_constant
= NULL_RTX
;
4374 first
= XVECEXP (vals
, 0, 0);
4375 for (i
= 0; i
< n_elts
; ++i
)
4377 x
= XVECEXP (vals
, 0, i
);
4378 if (!CONSTANT_P (x
))
4382 if (first_constant
== NULL_RTX
)
4385 if (i
> 0 && !rtx_equal_p (x
, first
))
4389 /* if all elements are the same, use splats to repeat elements */
4392 if (!CONSTANT_P (first
)
4393 && !register_operand (first
, GET_MODE (x
)))
4394 first
= force_reg (GET_MODE (first
), first
);
4395 emit_insn (gen_spu_splats (target
, first
));
4399 /* load constant parts */
4400 if (n_var
!= n_elts
)
4404 emit_move_insn (target
,
4405 gen_rtx_CONST_VECTOR (mode
, XVEC (vals
, 0)));
4409 rtx constant_parts_rtx
= copy_rtx (vals
);
4411 gcc_assert (first_constant
!= NULL_RTX
);
4412 /* fill empty slots with the first constant, this increases
4413 our chance of using splats in the recursive call below. */
4414 for (i
= 0; i
< n_elts
; ++i
)
4415 if (!CONSTANT_P (XVECEXP (constant_parts_rtx
, 0, i
)))
4416 XVECEXP (constant_parts_rtx
, 0, i
) = first_constant
;
4418 spu_expand_vector_init (target
, constant_parts_rtx
);
4422 /* load variable parts */
4425 rtx insert_operands
[4];
4427 insert_operands
[0] = target
;
4428 insert_operands
[2] = target
;
4429 for (i
= 0; i
< n_elts
; ++i
)
4431 x
= XVECEXP (vals
, 0, i
);
4432 if (!CONSTANT_P (x
))
4434 if (!register_operand (x
, GET_MODE (x
)))
4435 x
= force_reg (GET_MODE (x
), x
);
4436 insert_operands
[1] = x
;
4437 insert_operands
[3] = GEN_INT (i
);
4438 spu_builtin_insert (insert_operands
);
4445 spu_force_reg (enum machine_mode mode
, rtx op
)
4448 if (GET_MODE (op
) == VOIDmode
|| GET_MODE (op
) == BLKmode
)
4450 if ((SCALAR_INT_MODE_P (mode
) && GET_CODE (op
) == CONST_INT
)
4451 || GET_MODE (op
) == BLKmode
)
4452 return force_reg (mode
, convert_to_mode (mode
, op
, 0));
4456 r
= force_reg (GET_MODE (op
), op
);
4457 if (GET_MODE_SIZE (GET_MODE (op
)) == GET_MODE_SIZE (mode
))
4459 x
= simplify_gen_subreg (mode
, r
, GET_MODE (op
), 0);
4464 x
= gen_reg_rtx (mode
);
4465 emit_insn (gen_spu_convert (x
, r
));
4470 spu_check_builtin_parm (struct spu_builtin_description
*d
, rtx op
, int p
)
4472 HOST_WIDE_INT v
= 0;
4474 /* Check the range of immediate operands. */
4475 if (p
>= SPU_BTI_7
&& p
<= SPU_BTI_U18
)
4477 int range
= p
- SPU_BTI_7
;
4478 if (!CONSTANT_P (op
)
4479 || (GET_CODE (op
) == CONST_INT
4480 && (INTVAL (op
) < spu_builtin_range
[range
].low
4481 || INTVAL (op
) > spu_builtin_range
[range
].high
)))
4482 error ("%s expects an integer literal in the range [%d, %d].",
4484 spu_builtin_range
[range
].low
, spu_builtin_range
[range
].high
);
4486 if (GET_CODE (op
) == CONST
4487 && (GET_CODE (XEXP (op
, 0)) == PLUS
4488 || GET_CODE (XEXP (op
, 0)) == MINUS
))
4490 v
= INTVAL (XEXP (XEXP (op
, 0), 1));
4491 op
= XEXP (XEXP (op
, 0), 0);
4493 else if (GET_CODE (op
) == CONST_INT
)
4502 /* This is only used in lqa, and stqa. Even though the insns
4503 encode 16 bits of the address (all but the 2 least
4504 significant), only 14 bits are used because it is masked to
4505 be 16 byte aligned. */
4509 /* This is used for lqr and stqr. */
4516 if (GET_CODE (op
) == LABEL_REF
4517 || (GET_CODE (op
) == SYMBOL_REF
4518 && SYMBOL_REF_FUNCTION_P (op
))
4519 || (INTVAL (op
) & ((1 << lsbits
) - 1)) != 0)
4520 warning (0, "%d least significant bits of %s are ignored.", lsbits
,
4527 expand_builtin_args (struct spu_builtin_description
*d
, tree arglist
,
4528 rtx target
, rtx ops
[])
4530 enum insn_code icode
= d
->icode
;
4533 /* Expand the arguments into rtl. */
4535 if (d
->parm
[0] != SPU_BTI_VOID
)
4538 for (; i
< insn_data
[icode
].n_operands
; i
++)
4540 tree arg
= TREE_VALUE (arglist
);
4543 ops
[i
] = expand_expr (arg
, NULL_RTX
, VOIDmode
, 0);
4544 arglist
= TREE_CHAIN (arglist
);
4549 spu_expand_builtin_1 (struct spu_builtin_description
*d
,
4550 tree arglist
, rtx target
)
4554 enum insn_code icode
= d
->icode
;
4555 enum machine_mode mode
, tmode
;
4559 /* Set up ops[] with values from arglist. */
4560 expand_builtin_args (d
, arglist
, target
, ops
);
4562 /* Handle the target operand which must be operand 0. */
4564 if (d
->parm
[0] != SPU_BTI_VOID
)
4567 /* We prefer the mode specified for the match_operand otherwise
4568 use the mode from the builtin function prototype. */
4569 tmode
= insn_data
[d
->icode
].operand
[0].mode
;
4570 if (tmode
== VOIDmode
)
4571 tmode
= TYPE_MODE (spu_builtin_types
[d
->parm
[0]]);
4573 /* Try to use target because not using it can lead to extra copies
4574 and when we are using all of the registers extra copies leads
4576 if (target
&& GET_CODE (target
) == REG
&& GET_MODE (target
) == tmode
)
4579 target
= ops
[0] = gen_reg_rtx (tmode
);
4581 if (!(*insn_data
[icode
].operand
[0].predicate
) (ops
[0], tmode
))
4587 /* Ignore align_hint, but still expand it's args in case they have
4589 if (icode
== CODE_FOR_spu_align_hint
)
4592 /* Handle the rest of the operands. */
4593 for (p
= 1; i
< insn_data
[icode
].n_operands
; i
++, p
++)
4595 if (insn_data
[d
->icode
].operand
[i
].mode
!= VOIDmode
)
4596 mode
= insn_data
[d
->icode
].operand
[i
].mode
;
4598 mode
= TYPE_MODE (spu_builtin_types
[d
->parm
[i
]]);
4600 /* mode can be VOIDmode here for labels */
4602 /* For specific intrinsics with an immediate operand, e.g.,
4603 si_ai(), we sometimes need to convert the scalar argument to a
4604 vector argument by splatting the scalar. */
4605 if (VECTOR_MODE_P (mode
)
4606 && (GET_CODE (ops
[i
]) == CONST_INT
4607 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_INT
4608 || GET_MODE_CLASS (GET_MODE (ops
[i
])) == MODE_FLOAT
))
4610 if (GET_CODE (ops
[i
]) == CONST_INT
)
4611 ops
[i
] = spu_const (mode
, INTVAL (ops
[i
]));
4614 rtx reg
= gen_reg_rtx (mode
);
4615 enum machine_mode imode
= GET_MODE_INNER (mode
);
4616 if (!spu_nonmem_operand (ops
[i
], GET_MODE (ops
[i
])))
4617 ops
[i
] = force_reg (GET_MODE (ops
[i
]), ops
[i
]);
4618 if (imode
!= GET_MODE (ops
[i
]))
4619 ops
[i
] = convert_to_mode (imode
, ops
[i
],
4620 TYPE_UNSIGNED (spu_builtin_types
4622 emit_insn (gen_spu_splats (reg
, ops
[i
]));
4627 if (!(*insn_data
[icode
].operand
[i
].predicate
) (ops
[i
], mode
))
4628 ops
[i
] = spu_force_reg (mode
, ops
[i
]);
4630 spu_check_builtin_parm (d
, ops
[i
], d
->parm
[p
]);
4633 switch (insn_data
[icode
].n_operands
)
4636 pat
= GEN_FCN (icode
) (0);
4639 pat
= GEN_FCN (icode
) (ops
[0]);
4642 pat
= GEN_FCN (icode
) (ops
[0], ops
[1]);
4645 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2]);
4648 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3]);
4651 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4]);
4654 pat
= GEN_FCN (icode
) (ops
[0], ops
[1], ops
[2], ops
[3], ops
[4], ops
[5]);
4663 if (d
->type
== B_CALL
|| d
->type
== B_BISLED
)
4664 emit_call_insn (pat
);
4665 else if (d
->type
== B_JUMP
)
4667 emit_jump_insn (pat
);
4673 return_type
= spu_builtin_types
[d
->parm
[0]];
4674 if (d
->parm
[0] != SPU_BTI_VOID
4675 && GET_MODE (target
) != TYPE_MODE (return_type
))
4677 /* target is the return value. It should always be the mode of
4678 the builtin function prototype. */
4679 target
= spu_force_reg (TYPE_MODE (return_type
), target
);
4686 spu_expand_builtin (tree exp
,
4688 rtx subtarget ATTRIBUTE_UNUSED
,
4689 enum machine_mode mode ATTRIBUTE_UNUSED
,
4690 int ignore ATTRIBUTE_UNUSED
)
4692 tree fndecl
= TREE_OPERAND (TREE_OPERAND (exp
, 0), 0);
4693 unsigned int fcode
= DECL_FUNCTION_CODE (fndecl
) - END_BUILTINS
;
4694 tree arglist
= TREE_OPERAND (exp
, 1);
4695 struct spu_builtin_description
*d
;
4697 if (fcode
< NUM_SPU_BUILTINS
)
4699 d
= &spu_builtins
[fcode
];
4701 return spu_expand_builtin_1 (d
, arglist
, target
);